midlajvalappil commited on
Commit
c597d59
·
verified ·
1 Parent(s): b3783bc

Upload 24 files

Browse files
src/src/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Sign Language Detector Package
src/src/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (163 Bytes). View file
 
src/src/__pycache__/camera_handler.cpython-310.pyc ADDED
Binary file (8.47 kB). View file
 
src/src/__pycache__/export_utils.cpython-310.pyc ADDED
Binary file (10.3 kB). View file
 
src/src/__pycache__/fallback_classifier.cpython-310.pyc ADDED
Binary file (7.43 kB). View file
 
src/src/__pycache__/file_handler.cpython-310.pyc ADDED
Binary file (13.6 kB). View file
 
src/src/__pycache__/gemini_classifier.cpython-310.pyc ADDED
Binary file (10.4 kB). View file
 
src/src/__pycache__/gesture_extractor.cpython-310.pyc ADDED
Binary file (7.4 kB). View file
 
src/src/__pycache__/hand_detector.cpython-310.pyc ADDED
Binary file (5.89 kB). View file
 
src/src/__pycache__/openai_classifier.cpython-310.pyc ADDED
Binary file (11 kB). View file
 
src/src/__pycache__/output_handler.cpython-310.pyc ADDED
Binary file (10.7 kB). View file
 
src/src/__pycache__/prediction_logger.cpython-310.pyc ADDED
Binary file (9.71 kB). View file
 
src/src/__pycache__/visualization_utils.cpython-310.pyc ADDED
Binary file (9.67 kB). View file
 
src/src/camera_handler.py ADDED
@@ -0,0 +1,306 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Real-time Camera Input Handler for Sign Language Detection
3
+ """
4
+
5
+ import cv2
6
+ import numpy as np
7
+ import time
8
+ import threading
9
+ from typing import Optional, Callable, Dict, Any, List
10
+ from queue import Queue, Empty
11
+
12
+ from .hand_detector import HandDetector
13
+ from .gesture_extractor import GestureExtractor
14
+ from .openai_classifier import SignLanguageClassifier
15
+
16
+
17
+ class CameraHandler:
18
+ """
19
+ Handles real-time camera input for sign language detection.
20
+ """
21
+
22
+ def __init__(self,
23
+ camera_index: int = 0,
24
+ frame_width: int = 640,
25
+ frame_height: int = 480,
26
+ fps: int = 30,
27
+ detection_interval: float = 2.0):
28
+ """
29
+ Initialize the CameraHandler.
30
+
31
+ Args:
32
+ camera_index: Index of the camera to use
33
+ frame_width: Width of the camera frame
34
+ frame_height: Height of the camera frame
35
+ fps: Frames per second for camera capture
36
+ detection_interval: Seconds between gesture classifications
37
+ """
38
+ self.camera_index = camera_index
39
+ self.frame_width = frame_width
40
+ self.frame_height = frame_height
41
+ self.fps = fps
42
+ self.detection_interval = detection_interval
43
+
44
+ # Initialize components
45
+ self.hand_detector = HandDetector()
46
+ self.gesture_extractor = GestureExtractor()
47
+ self.classifier = None # Will be initialized when needed
48
+
49
+ # Camera and threading
50
+ self.cap = None
51
+ self.is_running = False
52
+ self.capture_thread = None
53
+ self.detection_thread = None
54
+
55
+ # Frame and detection queues
56
+ self.frame_queue = Queue(maxsize=10)
57
+ self.detection_queue = Queue(maxsize=5)
58
+
59
+ # Callbacks
60
+ self.on_frame_callback = None
61
+ self.on_detection_callback = None
62
+
63
+ # Detection state
64
+ self.last_detection_time = 0
65
+ self.gesture_history = []
66
+ self.max_history_length = 10
67
+
68
+ def initialize_camera(self) -> bool:
69
+ """
70
+ Initialize the camera.
71
+
72
+ Returns:
73
+ True if camera initialized successfully, False otherwise
74
+ """
75
+ try:
76
+ self.cap = cv2.VideoCapture(self.camera_index)
77
+
78
+ if not self.cap.isOpened():
79
+ print(f"Error: Could not open camera {self.camera_index}")
80
+ return False
81
+
82
+ # Set camera properties
83
+ self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, self.frame_width)
84
+ self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, self.frame_height)
85
+ self.cap.set(cv2.CAP_PROP_FPS, self.fps)
86
+
87
+ print(f"Camera initialized: {self.frame_width}x{self.frame_height} @ {self.fps}fps")
88
+ return True
89
+
90
+ except Exception as e:
91
+ print(f"Error initializing camera: {e}")
92
+ return False
93
+
94
+ def initialize_classifier(self, api_key: Optional[str] = None) -> bool:
95
+ """
96
+ Initialize the OpenAI classifier.
97
+
98
+ Args:
99
+ api_key: OpenAI API key
100
+
101
+ Returns:
102
+ True if classifier initialized successfully, False otherwise
103
+ """
104
+ try:
105
+ self.classifier = SignLanguageClassifier(api_key=api_key)
106
+ print("OpenAI classifier initialized")
107
+ return True
108
+ except Exception as e:
109
+ print(f"Error initializing classifier: {e}")
110
+ return False
111
+
112
+ def set_callbacks(self,
113
+ on_frame: Optional[Callable] = None,
114
+ on_detection: Optional[Callable] = None):
115
+ """
116
+ Set callback functions for frame and detection events.
117
+
118
+ Args:
119
+ on_frame: Callback for each processed frame
120
+ on_detection: Callback for gesture detections
121
+ """
122
+ self.on_frame_callback = on_frame
123
+ self.on_detection_callback = on_detection
124
+
125
+ def start_capture(self) -> bool:
126
+ """
127
+ Start the camera capture and detection threads.
128
+
129
+ Returns:
130
+ True if started successfully, False otherwise
131
+ """
132
+ if not self.cap or not self.cap.isOpened():
133
+ print("Camera not initialized")
134
+ return False
135
+
136
+ self.is_running = True
137
+
138
+ # Start capture thread
139
+ self.capture_thread = threading.Thread(target=self._capture_loop, daemon=True)
140
+ self.capture_thread.start()
141
+
142
+ # Start detection thread
143
+ self.detection_thread = threading.Thread(target=self._detection_loop, daemon=True)
144
+ self.detection_thread.start()
145
+
146
+ print("Camera capture started")
147
+ return True
148
+
149
+ def stop_capture(self):
150
+ """Stop the camera capture and detection threads."""
151
+ self.is_running = False
152
+
153
+ if self.capture_thread:
154
+ self.capture_thread.join(timeout=2.0)
155
+
156
+ if self.detection_thread:
157
+ self.detection_thread.join(timeout=2.0)
158
+
159
+ if self.cap:
160
+ self.cap.release()
161
+
162
+ print("Camera capture stopped")
163
+
164
+ def _capture_loop(self):
165
+ """Main camera capture loop (runs in separate thread)."""
166
+ while self.is_running:
167
+ ret, frame = self.cap.read()
168
+
169
+ if not ret:
170
+ print("Error reading frame from camera")
171
+ break
172
+
173
+ # Flip frame horizontally for mirror effect
174
+ frame = cv2.flip(frame, 1)
175
+
176
+ # Detect hands
177
+ annotated_frame, hand_landmarks = self.hand_detector.detect_hands(frame)
178
+
179
+ # Add frame to queue for detection processing
180
+ if not self.frame_queue.full():
181
+ self.frame_queue.put((frame.copy(), hand_landmarks))
182
+
183
+ # Call frame callback if set
184
+ if self.on_frame_callback:
185
+ self.on_frame_callback(annotated_frame, hand_landmarks)
186
+
187
+ # Small delay to control frame rate
188
+ time.sleep(1.0 / self.fps)
189
+
190
+ def _detection_loop(self):
191
+ """Gesture detection and classification loop (runs in separate thread)."""
192
+ while self.is_running:
193
+ try:
194
+ # Get frame from queue
195
+ frame, hand_landmarks = self.frame_queue.get(timeout=1.0)
196
+
197
+ # Check if enough time has passed since last detection
198
+ current_time = time.time()
199
+ if current_time - self.last_detection_time < self.detection_interval:
200
+ continue
201
+
202
+ # Process gestures if hands detected
203
+ if hand_landmarks and self.classifier:
204
+ self._process_gestures(hand_landmarks)
205
+ self.last_detection_time = current_time
206
+
207
+ except Empty:
208
+ continue
209
+ except Exception as e:
210
+ print(f"Error in detection loop: {e}")
211
+
212
+ def _process_gestures(self, hand_landmarks: List[Dict[str, Any]]):
213
+ """
214
+ Process detected hand landmarks and classify gestures.
215
+
216
+ Args:
217
+ hand_landmarks: List of detected hand landmarks
218
+ """
219
+ detections = []
220
+
221
+ for hand_data in hand_landmarks:
222
+ try:
223
+ # Extract gesture features
224
+ gesture_description = self.gesture_extractor.create_gesture_description(hand_data)
225
+
226
+ # Classify gesture
227
+ classification = self.classifier.classify_gesture(gesture_description)
228
+
229
+ if classification['success']:
230
+ detection = {
231
+ 'hand_label': hand_data['label'],
232
+ 'gesture_description': gesture_description,
233
+ 'classification': classification,
234
+ 'timestamp': time.time()
235
+ }
236
+ detections.append(detection)
237
+
238
+ # Add to gesture history
239
+ self.gesture_history.append(detection)
240
+ if len(self.gesture_history) > self.max_history_length:
241
+ self.gesture_history.pop(0)
242
+
243
+ except Exception as e:
244
+ print(f"Error processing gesture: {e}")
245
+
246
+ # Call detection callback if detections found
247
+ if detections and self.on_detection_callback:
248
+ self.on_detection_callback(detections)
249
+
250
+ def get_recent_gestures(self, count: int = 5) -> List[Dict[str, Any]]:
251
+ """
252
+ Get recent gesture detections.
253
+
254
+ Args:
255
+ count: Number of recent gestures to return
256
+
257
+ Returns:
258
+ List of recent gesture detections
259
+ """
260
+ return self.gesture_history[-count:] if self.gesture_history else []
261
+
262
+ def classify_gesture_sequence(self, count: int = 5) -> Optional[Dict[str, Any]]:
263
+ """
264
+ Classify a sequence of recent gestures.
265
+
266
+ Args:
267
+ count: Number of recent gestures to include in sequence
268
+
269
+ Returns:
270
+ Sequence classification result or None
271
+ """
272
+ if not self.classifier or len(self.gesture_history) < 2:
273
+ return None
274
+
275
+ recent_gestures = self.get_recent_gestures(count)
276
+ gesture_descriptions = [g['gesture_description'] for g in recent_gestures]
277
+
278
+ try:
279
+ return self.classifier.classify_sequence(gesture_descriptions)
280
+ except Exception as e:
281
+ print(f"Error classifying gesture sequence: {e}")
282
+ return None
283
+
284
+ def capture_single_frame(self) -> Optional[np.ndarray]:
285
+ """
286
+ Capture a single frame from the camera.
287
+
288
+ Returns:
289
+ Captured frame or None if error
290
+ """
291
+ if not self.cap or not self.cap.isOpened():
292
+ return None
293
+
294
+ ret, frame = self.cap.read()
295
+ if ret:
296
+ return cv2.flip(frame, 1) # Mirror effect
297
+ return None
298
+
299
+ def cleanup(self):
300
+ """Clean up resources."""
301
+ self.stop_capture()
302
+
303
+ if self.hand_detector:
304
+ self.hand_detector.cleanup()
305
+
306
+ cv2.destroyAllWindows()
src/src/export_utils.py ADDED
@@ -0,0 +1,418 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Export utilities for sign language detection results
3
+ """
4
+
5
+ import json
6
+ import csv
7
+ import os
8
+ from datetime import datetime
9
+ from typing import List, Dict, Any, Optional
10
+ import pandas as pd
11
+ from reportlab.lib.pagesizes import letter, A4
12
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image as RLImage
13
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
14
+ from reportlab.lib.units import inch
15
+ from reportlab.lib import colors
16
+ from reportlab.lib.enums import TA_CENTER, TA_LEFT
17
+ import tempfile
18
+ import cv2
19
+ import numpy as np
20
+ from PIL import Image
21
+ import io
22
+ import base64
23
+
24
+
25
+ class ResultExporter:
26
+ """
27
+ Export sign language detection results in various formats.
28
+ """
29
+
30
+ def __init__(self):
31
+ """Initialize the exporter."""
32
+ self.styles = getSampleStyleSheet()
33
+ self.custom_styles = self._create_custom_styles()
34
+
35
+ def _create_custom_styles(self) -> Dict[str, ParagraphStyle]:
36
+ """Create custom paragraph styles for PDF reports."""
37
+ custom_styles = {}
38
+
39
+ # Title style
40
+ custom_styles['CustomTitle'] = ParagraphStyle(
41
+ 'CustomTitle',
42
+ parent=self.styles['Title'],
43
+ fontSize=24,
44
+ spaceAfter=30,
45
+ alignment=TA_CENTER,
46
+ textColor=colors.darkblue
47
+ )
48
+
49
+ # Heading style
50
+ custom_styles['CustomHeading'] = ParagraphStyle(
51
+ 'CustomHeading',
52
+ parent=self.styles['Heading1'],
53
+ fontSize=16,
54
+ spaceAfter=12,
55
+ spaceBefore=20,
56
+ textColor=colors.darkblue
57
+ )
58
+
59
+ # Subheading style
60
+ custom_styles['CustomSubheading'] = ParagraphStyle(
61
+ 'CustomSubheading',
62
+ parent=self.styles['Heading2'],
63
+ fontSize=14,
64
+ spaceAfter=8,
65
+ spaceBefore=12,
66
+ textColor=colors.darkgreen
67
+ )
68
+
69
+ return custom_styles
70
+
71
+ def export_to_json(self, results: List[Dict[str, Any]],
72
+ output_path: str,
73
+ include_metadata: bool = True) -> bool:
74
+ """
75
+ Export results to JSON format.
76
+
77
+ Args:
78
+ results: List of processing results
79
+ output_path: Output file path
80
+ include_metadata: Whether to include metadata
81
+
82
+ Returns:
83
+ True if successful, False otherwise
84
+ """
85
+ try:
86
+ export_data = {
87
+ 'export_timestamp': datetime.now().isoformat(),
88
+ 'total_files': len(results),
89
+ 'successful_files': sum(1 for r in results if r.get('success', False)),
90
+ 'results': []
91
+ }
92
+
93
+ for result in results:
94
+ # Clean result for JSON serialization
95
+ clean_result = self._clean_result_for_export(result)
96
+
97
+ if not include_metadata:
98
+ # Remove large data like images
99
+ clean_result.pop('annotated_image', None)
100
+ clean_result.pop('enhanced_image', None)
101
+ clean_result.pop('comparison_image', None)
102
+ clean_result.pop('original_image', None)
103
+
104
+ export_data['results'].append(clean_result)
105
+
106
+ with open(output_path, 'w', encoding='utf-8') as f:
107
+ json.dump(export_data, f, indent=2, default=str, ensure_ascii=False)
108
+
109
+ return True
110
+
111
+ except Exception as e:
112
+ print(f"Error exporting to JSON: {e}")
113
+ return False
114
+
115
+ def export_to_csv(self, results: List[Dict[str, Any]], output_path: str) -> bool:
116
+ """
117
+ Export results to CSV format.
118
+
119
+ Args:
120
+ results: List of processing results
121
+ output_path: Output file path
122
+
123
+ Returns:
124
+ True if successful, False otherwise
125
+ """
126
+ try:
127
+ csv_data = []
128
+
129
+ for result in results:
130
+ if not result.get('success'):
131
+ csv_data.append({
132
+ 'filename': result.get('filename', ''),
133
+ 'file_type': result.get('file_type', ''),
134
+ 'success': False,
135
+ 'error': result.get('error', ''),
136
+ 'hands_detected': 0,
137
+ 'hand_label': '',
138
+ 'confidence': 0,
139
+ 'letter': '',
140
+ 'word': '',
141
+ 'ai_confidence': 0
142
+ })
143
+ continue
144
+
145
+ if result.get('detections'):
146
+ for detection in result['detections']:
147
+ row = {
148
+ 'filename': result.get('filename', ''),
149
+ 'file_type': result.get('file_type', ''),
150
+ 'success': True,
151
+ 'error': '',
152
+ 'hands_detected': result.get('hands_detected', 0),
153
+ 'hand_label': detection.get('hand_label', ''),
154
+ 'confidence': detection.get('confidence', 0),
155
+ 'gesture_description': detection.get('gesture_description', '')
156
+ }
157
+
158
+ # Add classification data if available
159
+ if 'classification' in detection and detection['classification'].get('success'):
160
+ classification = detection['classification']
161
+ row.update({
162
+ 'letter': classification.get('letter', ''),
163
+ 'word': classification.get('word', ''),
164
+ 'ai_confidence': classification.get('confidence', 0)
165
+ })
166
+ else:
167
+ row.update({
168
+ 'letter': '',
169
+ 'word': '',
170
+ 'ai_confidence': 0
171
+ })
172
+
173
+ csv_data.append(row)
174
+ else:
175
+ # No detections
176
+ csv_data.append({
177
+ 'filename': result.get('filename', ''),
178
+ 'file_type': result.get('file_type', ''),
179
+ 'success': True,
180
+ 'error': '',
181
+ 'hands_detected': 0,
182
+ 'hand_label': '',
183
+ 'confidence': 0,
184
+ 'letter': '',
185
+ 'word': '',
186
+ 'ai_confidence': 0
187
+ })
188
+
189
+ # Write to CSV
190
+ if csv_data:
191
+ df = pd.DataFrame(csv_data)
192
+ df.to_csv(output_path, index=False)
193
+ return True
194
+
195
+ return False
196
+
197
+ except Exception as e:
198
+ print(f"Error exporting to CSV: {e}")
199
+ return False
200
+
201
+ def export_to_pdf(self, results: List[Dict[str, Any]],
202
+ output_path: str,
203
+ include_images: bool = True) -> bool:
204
+ """
205
+ Export results to PDF report.
206
+
207
+ Args:
208
+ results: List of processing results
209
+ output_path: Output file path
210
+ include_images: Whether to include images in the report
211
+
212
+ Returns:
213
+ True if successful, False otherwise
214
+ """
215
+ try:
216
+ doc = SimpleDocTemplate(output_path, pagesize=A4)
217
+ story = []
218
+
219
+ # Title
220
+ title = Paragraph("Sign Language Detection Report", self.custom_styles['CustomTitle'])
221
+ story.append(title)
222
+ story.append(Spacer(1, 20))
223
+
224
+ # Summary
225
+ successful_files = sum(1 for r in results if r.get('success', False))
226
+ total_hands = sum(r.get('hands_detected', 0) for r in results if r.get('success', False))
227
+
228
+ summary_text = f"""
229
+ <b>Processing Summary</b><br/>
230
+ Total Files: {len(results)}<br/>
231
+ Successful: {successful_files}<br/>
232
+ Total Hands Detected: {total_hands}<br/>
233
+ Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
234
+ """
235
+
236
+ summary = Paragraph(summary_text, self.styles['Normal'])
237
+ story.append(summary)
238
+ story.append(Spacer(1, 20))
239
+
240
+ # Results for each file
241
+ for i, result in enumerate(results):
242
+ # File header
243
+ filename = result.get('filename', f'File {i+1}')
244
+ header = Paragraph(f"File: {filename}", self.custom_styles['CustomHeading'])
245
+ story.append(header)
246
+
247
+ if not result.get('success'):
248
+ error_text = f"<font color='red'>Error: {result.get('error', 'Unknown error')}</font>"
249
+ error_para = Paragraph(error_text, self.styles['Normal'])
250
+ story.append(error_para)
251
+ story.append(Spacer(1, 10))
252
+ continue
253
+
254
+ # File info
255
+ file_info = [
256
+ ['Property', 'Value'],
257
+ ['File Type', result.get('file_type', 'Unknown')],
258
+ ['File Size', f"{result.get('file_size', 0) / 1024:.1f} KB"],
259
+ ['Hands Detected', str(result.get('hands_detected', 0))]
260
+ ]
261
+
262
+ if result.get('file_type') == 'video':
263
+ video_props = result.get('video_properties', {})
264
+ file_info.extend([
265
+ ['Duration', f"{video_props.get('duration', 0):.1f}s"],
266
+ ['FPS', f"{video_props.get('fps', 0):.1f}"],
267
+ ['Total Frames', str(video_props.get('total_frames', 0))]
268
+ ])
269
+
270
+ info_table = Table(file_info)
271
+ info_table.setStyle(TableStyle([
272
+ ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
273
+ ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
274
+ ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
275
+ ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
276
+ ('FONTSIZE', (0, 0), (-1, 0), 12),
277
+ ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
278
+ ('BACKGROUND', (0, 1), (-1, -1), colors.beige),
279
+ ('GRID', (0, 0), (-1, -1), 1, colors.black)
280
+ ]))
281
+
282
+ story.append(info_table)
283
+ story.append(Spacer(1, 15))
284
+
285
+ # Detection details
286
+ if result.get('detections'):
287
+ detections_header = Paragraph("Detection Details", self.custom_styles['CustomSubheading'])
288
+ story.append(detections_header)
289
+
290
+ for j, detection in enumerate(result['detections']):
291
+ detection_text = f"""
292
+ <b>Hand {j+1}: {detection.get('hand_label', 'Unknown')}</b><br/>
293
+ Confidence: {detection.get('confidence', 0):.1%}<br/>
294
+ """
295
+
296
+ if 'classification' in detection and detection['classification'].get('success'):
297
+ classification = detection['classification']
298
+ if classification.get('letter'):
299
+ detection_text += f"Letter: <b>{classification['letter']}</b><br/>"
300
+ if classification.get('word'):
301
+ detection_text += f"Word: <b>{classification['word']}</b><br/>"
302
+ if classification.get('confidence'):
303
+ detection_text += f"AI Confidence: {classification['confidence']:.1%}<br/>"
304
+
305
+ detection_para = Paragraph(detection_text, self.styles['Normal'])
306
+ story.append(detection_para)
307
+ story.append(Spacer(1, 10))
308
+
309
+ story.append(Spacer(1, 20))
310
+
311
+ # Build PDF
312
+ doc.build(story)
313
+ return True
314
+
315
+ except Exception as e:
316
+ print(f"Error exporting to PDF: {e}")
317
+ return False
318
+
319
+ def _clean_result_for_export(self, result: Dict[str, Any]) -> Dict[str, Any]:
320
+ """
321
+ Clean result dictionary for export by converting numpy arrays to lists.
322
+
323
+ Args:
324
+ result: Result dictionary
325
+
326
+ Returns:
327
+ Cleaned result dictionary
328
+ """
329
+ clean_result = {}
330
+
331
+ for key, value in result.items():
332
+ if isinstance(value, np.ndarray):
333
+ # Convert numpy arrays to base64 encoded strings for images
334
+ if key in ['annotated_image', 'enhanced_image', 'comparison_image', 'original_image']:
335
+ try:
336
+ # Convert to PIL Image and then to base64
337
+ if len(value.shape) == 3:
338
+ # Convert BGR to RGB for proper color representation
339
+ value_rgb = cv2.cvtColor(value, cv2.COLOR_BGR2RGB)
340
+ pil_image = Image.fromarray(value_rgb)
341
+ else:
342
+ pil_image = Image.fromarray(value)
343
+
344
+ buffer = io.BytesIO()
345
+ pil_image.save(buffer, format='PNG')
346
+ img_str = base64.b64encode(buffer.getvalue()).decode()
347
+ clean_result[key] = f"data:image/png;base64,{img_str}"
348
+ except:
349
+ clean_result[key] = None
350
+ else:
351
+ clean_result[key] = value.tolist()
352
+ elif isinstance(value, (list, dict)):
353
+ clean_result[key] = value
354
+ else:
355
+ clean_result[key] = value
356
+
357
+ return clean_result
358
+
359
+ def create_summary_report(self, results: List[Dict[str, Any]]) -> Dict[str, Any]:
360
+ """
361
+ Create a summary report of the processing results.
362
+
363
+ Args:
364
+ results: List of processing results
365
+
366
+ Returns:
367
+ Summary report dictionary
368
+ """
369
+ summary = {
370
+ 'total_files': len(results),
371
+ 'successful_files': 0,
372
+ 'failed_files': 0,
373
+ 'total_hands_detected': 0,
374
+ 'file_types': {},
375
+ 'detected_letters': {},
376
+ 'detected_words': {},
377
+ 'average_confidence': 0,
378
+ 'processing_errors': []
379
+ }
380
+
381
+ confidences = []
382
+
383
+ for result in results:
384
+ if result.get('success'):
385
+ summary['successful_files'] += 1
386
+ summary['total_hands_detected'] += result.get('hands_detected', 0)
387
+
388
+ # File type statistics
389
+ file_type = result.get('file_type', 'unknown')
390
+ summary['file_types'][file_type] = summary['file_types'].get(file_type, 0) + 1
391
+
392
+ # Process detections
393
+ for detection in result.get('detections', []):
394
+ if 'confidence' in detection:
395
+ confidences.append(detection['confidence'])
396
+
397
+ if 'classification' in detection and detection['classification'].get('success'):
398
+ classification = detection['classification']
399
+
400
+ if classification.get('letter'):
401
+ letter = classification['letter']
402
+ summary['detected_letters'][letter] = summary['detected_letters'].get(letter, 0) + 1
403
+
404
+ if classification.get('word'):
405
+ word = classification['word']
406
+ summary['detected_words'][word] = summary['detected_words'].get(word, 0) + 1
407
+ else:
408
+ summary['failed_files'] += 1
409
+ summary['processing_errors'].append({
410
+ 'filename': result.get('filename', 'unknown'),
411
+ 'error': result.get('error', 'unknown error')
412
+ })
413
+
414
+ # Calculate average confidence
415
+ if confidences:
416
+ summary['average_confidence'] = sum(confidences) / len(confidences)
417
+
418
+ return summary
src/src/fallback_classifier.py ADDED
@@ -0,0 +1,303 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Fallback Sign Language Classifier
3
+
4
+ This module provides basic sign language classification without requiring OpenAI API.
5
+ It uses rule-based pattern matching to identify common ASL letters and gestures.
6
+ """
7
+
8
+ from typing import Dict, Any, Optional
9
+ import re
10
+
11
+
12
+ class FallbackSignLanguageClassifier:
13
+ """
14
+ Fallback classifier for basic ASL recognition using pattern matching.
15
+ """
16
+
17
+ def __init__(self):
18
+ """Initialize the fallback classifier."""
19
+ self.debug = True
20
+ print("Fallback classifier initialized (no API required)")
21
+
22
+ def classify_gesture(self, gesture_description: str,
23
+ sign_language: str = "ASL",
24
+ context: Optional[str] = None) -> Dict[str, Any]:
25
+ """
26
+ Classify gesture using rule-based pattern matching.
27
+
28
+ Args:
29
+ gesture_description: Description of the hand gesture
30
+ sign_language: Sign language type (default: ASL)
31
+ context: Additional context (optional)
32
+
33
+ Returns:
34
+ Classification result dictionary
35
+ """
36
+ if self.debug:
37
+ print(f"\n=== Fallback Classification Debug ===")
38
+ print(f"Input: {gesture_description}")
39
+
40
+ try:
41
+ # Analyze the gesture description
42
+ result = self._analyze_gesture_patterns(gesture_description)
43
+ result['success'] = True
44
+ result['method'] = 'fallback_pattern_matching'
45
+
46
+ if self.debug:
47
+ print(f"Result: {result}")
48
+ print("=== End Fallback Debug ===\n")
49
+
50
+ return result
51
+
52
+ except Exception as e:
53
+ if self.debug:
54
+ print(f"Fallback classification error: {str(e)}")
55
+ print("=== End Fallback Debug ===\n")
56
+
57
+ return {
58
+ 'success': False,
59
+ 'error': str(e),
60
+ 'letter': None,
61
+ 'word': None,
62
+ 'confidence': 0.0,
63
+ 'description': 'Fallback classification failed',
64
+ 'method': 'fallback_pattern_matching'
65
+ }
66
+
67
+ def _analyze_gesture_patterns(self, description: str) -> Dict[str, Any]:
68
+ """
69
+ Analyze gesture description using pattern matching rules.
70
+
71
+ Args:
72
+ description: Gesture description string
73
+
74
+ Returns:
75
+ Classification result
76
+ """
77
+ desc_lower = description.lower()
78
+
79
+ # Extract key information
80
+ extended_fingers = self._extract_extended_fingers(desc_lower)
81
+ closed_fingers = self._extract_closed_fingers(desc_lower)
82
+ patterns = self._extract_patterns(desc_lower)
83
+
84
+ # Rule-based classification
85
+ letter, word, confidence, explanation = self._apply_classification_rules(
86
+ extended_fingers, closed_fingers, patterns, desc_lower
87
+ )
88
+
89
+ return {
90
+ 'letter': letter,
91
+ 'word': word,
92
+ 'confidence': confidence,
93
+ 'description': explanation,
94
+ 'extended_fingers': extended_fingers,
95
+ 'closed_fingers': closed_fingers,
96
+ 'patterns': patterns
97
+ }
98
+
99
+ def _extract_extended_fingers(self, description: str) -> list:
100
+ """Extract extended fingers from description."""
101
+ extended = []
102
+ if 'extended fingers:' in description:
103
+ # Find the extended fingers section
104
+ start = description.find('extended fingers:') + len('extended fingers:')
105
+ end = description.find(';', start)
106
+ if end == -1:
107
+ end = len(description)
108
+
109
+ fingers_text = description[start:end].strip()
110
+
111
+ # Extract individual fingers
112
+ if 'thumb' in fingers_text:
113
+ extended.append('thumb')
114
+ if 'index' in fingers_text:
115
+ extended.append('index')
116
+ if 'middle' in fingers_text:
117
+ extended.append('middle')
118
+ if 'ring' in fingers_text:
119
+ extended.append('ring')
120
+ if 'pinky' in fingers_text:
121
+ extended.append('pinky')
122
+
123
+ return extended
124
+
125
+ def _extract_closed_fingers(self, description: str) -> list:
126
+ """Extract closed fingers from description."""
127
+ closed = []
128
+ if 'closed fingers:' in description:
129
+ # Find the closed fingers section
130
+ start = description.find('closed fingers:') + len('closed fingers:')
131
+ end = description.find(';', start)
132
+ if end == -1:
133
+ end = len(description)
134
+
135
+ fingers_text = description[start:end].strip()
136
+
137
+ # Extract individual fingers
138
+ if 'thumb' in fingers_text:
139
+ closed.append('thumb')
140
+ if 'index' in fingers_text:
141
+ closed.append('index')
142
+ if 'middle' in fingers_text:
143
+ closed.append('middle')
144
+ if 'ring' in fingers_text:
145
+ closed.append('ring')
146
+ if 'pinky' in fingers_text:
147
+ closed.append('pinky')
148
+
149
+ return closed
150
+
151
+ def _extract_patterns(self, description: str) -> list:
152
+ """Extract gesture patterns from description."""
153
+ patterns = []
154
+
155
+ if 'closed fist' in description:
156
+ patterns.append('closed_fist')
157
+ if 'open hand' in description:
158
+ patterns.append('open_hand')
159
+ if 'pointing gesture' in description:
160
+ patterns.append('pointing')
161
+ if 'pinch gesture' in description:
162
+ patterns.append('pinch')
163
+
164
+ return patterns
165
+
166
+ def _apply_classification_rules(self, extended: list, closed: list,
167
+ patterns: list, description: str) -> tuple:
168
+ """
169
+ Apply enhanced ASL-specific classification logic.
170
+
171
+ Returns:
172
+ (letter, word, confidence, explanation)
173
+ """
174
+
175
+ # PRECISE ASL RULES based on exact finger positions
176
+
177
+ # Rule 1: Single finger extended
178
+ if len(extended) == 1:
179
+ if 'index' in extended:
180
+ return '1', None, 0.9, "Index finger only = Number 1"
181
+ elif 'pinky' in extended:
182
+ return None, 'I', 0.9, "Pinky finger only = Pronoun I"
183
+ elif 'thumb' in extended:
184
+ return None, 'GOOD', 0.8, "Thumb up = GOOD"
185
+ elif 'middle' in extended:
186
+ return None, 'BAD', 0.6, "Middle finger = BAD (rude gesture)"
187
+
188
+ # Rule 2: Two fingers extended
189
+ if len(extended) == 2:
190
+ if 'index' in extended and 'middle' in extended:
191
+ return '2', None, 0.9, "Index and middle = Number 2"
192
+ elif 'index' in extended and 'thumb' in extended:
193
+ return 'L', None, 0.8, "Index and thumb = Letter L"
194
+ elif 'index' in extended and 'pinky' in extended:
195
+ return None, 'I LOVE YOU', 0.9, "Index and pinky = I LOVE YOU sign"
196
+ elif 'thumb' in extended and 'pinky' in extended:
197
+ return None, 'CALL', 0.7, "Thumb and pinky = CALL/PHONE"
198
+
199
+ # Rule 3: Three fingers extended
200
+ if len(extended) == 3:
201
+ if 'index' in extended and 'middle' in extended and 'ring' in extended:
202
+ return '3', None, 0.9, "Three middle fingers = Number 3"
203
+ elif 'thumb' in extended and 'index' in extended and 'pinky' in extended:
204
+ return None, 'I LOVE YOU', 0.9, "Thumb, index, pinky = I LOVE YOU"
205
+
206
+ # Rule 4: Four fingers extended (thumb closed)
207
+ if len(extended) == 4 and 'thumb' in closed:
208
+ return '4', None, 0.9, "Four fingers, thumb closed = Number 4"
209
+
210
+ # Rule 5: All five fingers extended
211
+ if len(extended) == 5:
212
+ return '5', None, 0.9, "All fingers extended = Number 5"
213
+
214
+ # Rule 6: Closed fist (no fingers extended)
215
+ if len(extended) == 0 or 'closed_fist' in patterns:
216
+ return 'A', None, 0.8, "Closed fist = Letter A"
217
+
218
+ # Rule 7: Four fingers extended (index, middle, ring, pinky) - thumb closed
219
+ if (len(extended) == 4 and 'index' in extended and 'middle' in extended
220
+ and 'ring' in extended and 'pinky' in extended and 'thumb' in closed):
221
+ return None, 'HELLO', 0.8, "Four fingers extended = HELLO"
222
+
223
+ # Rule 8: Pinch gesture pattern
224
+ if 'pinch' in patterns:
225
+ return 'F', None, 0.7, "Pinch gesture = Letter F"
226
+
227
+ # Rule 9: Pointing gesture pattern
228
+ if 'pointing' in patterns:
229
+ if 'index' in extended and len(extended) == 1:
230
+ return '1', None, 0.8, "Pointing with index = Number 1"
231
+ else:
232
+ return None, 'YOU', 0.6, "Pointing gesture = YOU"
233
+
234
+ # Rule 10: Open hand pattern
235
+ if 'open_hand' in patterns:
236
+ if len(extended) == 5:
237
+ return '5', None, 0.8, "Open hand = Number 5"
238
+ else:
239
+ return None, 'HELLO', 0.7, "Open hand = HELLO"
240
+
241
+ # Default fallback based on finger count with lower confidence
242
+ finger_count = len(extended)
243
+ if finger_count == 0:
244
+ return 'A', None, 0.4, f"No extended fingers, default to A"
245
+ elif finger_count == 1:
246
+ return '1', None, 0.4, f"One finger extended, default to 1"
247
+ elif finger_count == 2:
248
+ return '2', None, 0.4, f"Two fingers extended, default to 2"
249
+ elif finger_count == 3:
250
+ return '3', None, 0.4, f"Three fingers extended, default to 3"
251
+ elif finger_count == 4:
252
+ return '4', None, 0.4, f"Four fingers extended, default to 4"
253
+ elif finger_count == 5:
254
+ return '5', None, 0.4, f"Five fingers extended, default to 5"
255
+ else:
256
+ return None, None, 0.1, "Unable to classify gesture"
257
+
258
+ def classify_sequence(self, gesture_descriptions: list,
259
+ sign_language: str = "ASL") -> Dict[str, Any]:
260
+ """
261
+ Classify a sequence of gestures (fallback implementation).
262
+
263
+ Args:
264
+ gesture_descriptions: List of gesture descriptions
265
+ sign_language: Sign language type
266
+
267
+ Returns:
268
+ Sequence classification result
269
+ """
270
+ # Simple implementation: classify each gesture and combine
271
+ letters = []
272
+ words = []
273
+
274
+ for desc in gesture_descriptions:
275
+ result = self.classify_gesture(desc, sign_language)
276
+ if result.get('success'):
277
+ if result.get('letter'):
278
+ letters.append(result['letter'])
279
+ if result.get('word'):
280
+ words.append(result['word'])
281
+
282
+ # Try to form words from letters
283
+ if letters and not words:
284
+ letter_sequence = ''.join(letters)
285
+ # Check for common words
286
+ common_words = {
287
+ 'HI': 'HI',
288
+ 'NO': 'NO',
289
+ 'OK': 'OK',
290
+ 'YES': 'YES'
291
+ }
292
+
293
+ if letter_sequence in common_words:
294
+ words.append(common_words[letter_sequence])
295
+
296
+ return {
297
+ 'success': True,
298
+ 'word': words[0] if words else None,
299
+ 'sentence': ' '.join(words) if len(words) > 1 else None,
300
+ 'confidence': 0.6,
301
+ 'individual_letters': letters,
302
+ 'method': 'fallback_sequence_matching'
303
+ }
src/src/file_handler.py ADDED
@@ -0,0 +1,543 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ File Input Handler for Sign Language Detection
3
+ Processes video and image files for gesture analysis
4
+ """
5
+
6
+ import cv2
7
+ import numpy as np
8
+ import os
9
+ from typing import List, Dict, Any, Optional, Tuple, Generator
10
+ from PIL import Image
11
+ import time
12
+
13
+ from .hand_detector import HandDetector
14
+ from .gesture_extractor import GestureExtractor
15
+ from .openai_classifier import SignLanguageClassifier
16
+ from .gemini_classifier import GeminiSignLanguageClassifier
17
+ from .prediction_logger import PredictionLogger
18
+ from .visualization_utils import HandLandmarkVisualizer, create_comparison_view
19
+
20
+
21
+ class FileHandler:
22
+ """
23
+ Handles file input (images and videos) for sign language detection.
24
+ """
25
+
26
+ def __init__(self,
27
+ frame_skip: int = 5,
28
+ max_frames: int = 100):
29
+ """
30
+ Initialize the FileHandler.
31
+
32
+ Args:
33
+ frame_skip: Number of frames to skip between processing (for videos)
34
+ max_frames: Maximum number of frames to process from a video
35
+ """
36
+ self.frame_skip = frame_skip
37
+ self.max_frames = max_frames
38
+
39
+ # Initialize components
40
+ self.hand_detector = HandDetector(static_image_mode=True)
41
+ self.gesture_extractor = GestureExtractor()
42
+ self.classifier = None
43
+ self.visualizer = HandLandmarkVisualizer()
44
+ self.logger = PredictionLogger(debug=True)
45
+
46
+ # Supported file formats
47
+ self.supported_image_formats = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp'}
48
+ self.supported_video_formats = {'.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv'}
49
+
50
+ def initialize_classifier(self, api_key: Optional[str] = None, use_gemini: bool = True) -> bool:
51
+ """
52
+ Initialize the AI classifier (Gemini or OpenAI).
53
+
54
+ Args:
55
+ api_key: API key (Gemini or OpenAI)
56
+ use_gemini: Whether to use Gemini instead of OpenAI (default: True)
57
+
58
+ Returns:
59
+ True if classifier initialized successfully, False otherwise
60
+ """
61
+ # Check environment variable for preference
62
+ use_gemini_env = os.getenv('USE_GEMINI', 'True').lower() == 'true'
63
+ use_gemini = use_gemini and use_gemini_env
64
+
65
+ if use_gemini:
66
+ try:
67
+ self.classifier = GeminiSignLanguageClassifier(api_key=api_key)
68
+ print("✅ Gemini AI classifier initialized for file processing")
69
+ return True
70
+ except Exception as e:
71
+ print(f"⚠️ Failed to initialize Gemini classifier: {e}")
72
+ print("🔄 Falling back to OpenAI classifier...")
73
+
74
+ # Fallback to OpenAI
75
+ try:
76
+ self.classifier = SignLanguageClassifier(api_key=api_key)
77
+ print("✅ OpenAI classifier initialized as fallback")
78
+ return True
79
+ except Exception as e2:
80
+ print(f"❌ OpenAI classifier also failed: {e2}")
81
+ print("🔧 Will use pattern-based fallback only")
82
+ return False
83
+ else:
84
+ try:
85
+ self.classifier = SignLanguageClassifier(api_key=api_key)
86
+ print("✅ OpenAI classifier initialized for file processing")
87
+ return True
88
+ except Exception as e:
89
+ print(f"❌ Failed to initialize OpenAI classifier: {e}")
90
+ print("🔧 Will use pattern-based fallback only")
91
+ return False
92
+
93
+ def is_supported_file(self, file_path: str) -> bool:
94
+ """
95
+ Check if the file format is supported.
96
+
97
+ Args:
98
+ file_path: Path to the file
99
+
100
+ Returns:
101
+ True if file format is supported, False otherwise
102
+ """
103
+ if not os.path.exists(file_path):
104
+ return False
105
+
106
+ file_ext = os.path.splitext(file_path)[1].lower()
107
+ return file_ext in self.supported_image_formats or file_ext in self.supported_video_formats
108
+
109
+ def get_file_type(self, file_path: str) -> str:
110
+ """
111
+ Determine if file is image or video.
112
+
113
+ Args:
114
+ file_path: Path to the file
115
+
116
+ Returns:
117
+ 'image', 'video', or 'unknown'
118
+ """
119
+ file_ext = os.path.splitext(file_path)[1].lower()
120
+
121
+ if file_ext in self.supported_image_formats:
122
+ return 'image'
123
+ elif file_ext in self.supported_video_formats:
124
+ return 'video'
125
+ else:
126
+ return 'unknown'
127
+
128
+ def process_image(self, image_path: str) -> Dict[str, Any]:
129
+ """
130
+ Process a single image file for gesture detection.
131
+
132
+ Args:
133
+ image_path: Path to the image file
134
+
135
+ Returns:
136
+ Dictionary containing processing results
137
+ """
138
+ if not os.path.exists(image_path):
139
+ return {'success': False, 'error': 'File not found'}
140
+
141
+ try:
142
+ # Load image
143
+ image = cv2.imread(image_path)
144
+ if image is None:
145
+ return {'success': False, 'error': 'Could not load image'}
146
+
147
+ # Detect hands
148
+ annotated_image, hand_landmarks = self.hand_detector.detect_hands(image)
149
+
150
+ print(f"\n=== Hand Detection Debug ===")
151
+ print(f"Processing image: {os.path.basename(image_path)}")
152
+ print(f"Image shape: {image.shape}")
153
+ print(f"Hands detected: {len(hand_landmarks) if hand_landmarks else 0}")
154
+ if hand_landmarks:
155
+ for i, hand in enumerate(hand_landmarks):
156
+ print(f"Hand {i+1}: {hand['label']}, confidence: {hand['confidence']:.3f}")
157
+ print("=== End Hand Detection Debug ===\n")
158
+
159
+ # Create enhanced visualization
160
+ enhanced_image = self.visualizer.draw_enhanced_landmarks(image, hand_landmarks) if hand_landmarks else annotated_image
161
+
162
+ # Create comparison view
163
+ comparison_image = create_comparison_view(image, enhanced_image)
164
+
165
+ # Process gestures
166
+ detections = []
167
+ if hand_landmarks:
168
+ for hand_data in hand_landmarks:
169
+ gesture_description = self.gesture_extractor.create_gesture_description(hand_data)
170
+
171
+ detection = {
172
+ 'hand_label': hand_data['label'],
173
+ 'gesture_description': gesture_description,
174
+ 'confidence': hand_data['confidence'],
175
+ 'bounding_box': self.hand_detector.get_bounding_box(
176
+ hand_data, image.shape[1], image.shape[0]
177
+ ),
178
+ 'landmarks_3d': hand_data['landmarks'] # Store for 3D visualization
179
+ }
180
+
181
+ # Classify gesture if classifier available
182
+ if self.classifier:
183
+ print(f"\n=== File Handler Debug ===")
184
+ print(f"Processing hand: {hand_data['label']}")
185
+ print(f"Gesture description: {gesture_description}")
186
+
187
+ classification = self.classifier.classify_gesture(gesture_description)
188
+ detection['classification'] = classification
189
+
190
+ print(f"Classification result: {classification}")
191
+ print("=== End File Handler Debug ===\n")
192
+
193
+ detections.append(detection)
194
+
195
+ return {
196
+ 'success': True,
197
+ 'file_path': image_path,
198
+ 'file_type': 'image',
199
+ 'image_shape': image.shape,
200
+ 'hands_detected': len(hand_landmarks) if hand_landmarks else 0,
201
+ 'detections': detections,
202
+ 'annotated_image': annotated_image,
203
+ 'enhanced_image': enhanced_image,
204
+ 'comparison_image': comparison_image,
205
+ 'original_image': image
206
+ }
207
+
208
+ except Exception as e:
209
+ return {'success': False, 'error': str(e)}
210
+
211
+ def process_video(self, video_path: str,
212
+ progress_callback: Optional[callable] = None) -> Dict[str, Any]:
213
+ """
214
+ Process a video file for gesture detection.
215
+
216
+ Args:
217
+ video_path: Path to the video file
218
+ progress_callback: Optional callback for progress updates
219
+
220
+ Returns:
221
+ Dictionary containing processing results
222
+ """
223
+ if not os.path.exists(video_path):
224
+ return {'success': False, 'error': 'File not found'}
225
+
226
+ try:
227
+ # Open video
228
+ cap = cv2.VideoCapture(video_path)
229
+ if not cap.isOpened():
230
+ return {'success': False, 'error': 'Could not open video file'}
231
+
232
+ # Get video properties
233
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
234
+ fps = cap.get(cv2.CAP_PROP_FPS)
235
+ duration = total_frames / fps if fps > 0 else 0
236
+
237
+ # Process frames
238
+ frame_detections = []
239
+ frame_count = 0
240
+ processed_frames = 0
241
+
242
+ while cap.isOpened() and processed_frames < self.max_frames:
243
+ ret, frame = cap.read()
244
+ if not ret:
245
+ break
246
+
247
+ # Skip frames based on frame_skip setting
248
+ if frame_count % (self.frame_skip + 1) != 0:
249
+ frame_count += 1
250
+ continue
251
+
252
+ # Process frame
253
+ timestamp = frame_count / fps if fps > 0 else frame_count
254
+ frame_result = self._process_video_frame(frame, timestamp, frame_count)
255
+
256
+ if frame_result['hands_detected'] > 0:
257
+ frame_detections.append(frame_result)
258
+
259
+ processed_frames += 1
260
+ frame_count += 1
261
+
262
+ # Progress callback
263
+ if progress_callback:
264
+ progress = min(processed_frames / self.max_frames, frame_count / total_frames)
265
+ progress_callback(progress)
266
+
267
+ cap.release()
268
+
269
+ # Analyze sequence if detections found
270
+ sequence_analysis = None
271
+ if frame_detections and self.classifier:
272
+ sequence_analysis = self._analyze_video_sequence(frame_detections)
273
+
274
+ return {
275
+ 'success': True,
276
+ 'file_path': video_path,
277
+ 'file_type': 'video',
278
+ 'video_properties': {
279
+ 'total_frames': total_frames,
280
+ 'fps': fps,
281
+ 'duration': duration,
282
+ 'processed_frames': processed_frames
283
+ },
284
+ 'frame_detections': frame_detections,
285
+ 'sequence_analysis': sequence_analysis,
286
+ 'total_hands_detected': sum(f['hands_detected'] for f in frame_detections)
287
+ }
288
+
289
+ except Exception as e:
290
+ return {'success': False, 'error': str(e)}
291
+
292
+ def _process_video_frame(self, frame: np.ndarray,
293
+ timestamp: float, frame_number: int) -> Dict[str, Any]:
294
+ """
295
+ Process a single video frame.
296
+
297
+ Args:
298
+ frame: Video frame as numpy array
299
+ timestamp: Timestamp in seconds
300
+ frame_number: Frame number
301
+
302
+ Returns:
303
+ Dictionary containing frame processing results
304
+ """
305
+ # Detect hands
306
+ annotated_frame, hand_landmarks = self.hand_detector.detect_hands(frame)
307
+
308
+ # Process gestures
309
+ detections = []
310
+ if hand_landmarks:
311
+ for hand_data in hand_landmarks:
312
+ gesture_description = self.gesture_extractor.create_gesture_description(hand_data)
313
+
314
+ detection = {
315
+ 'hand_label': hand_data['label'],
316
+ 'gesture_description': gesture_description,
317
+ 'confidence': hand_data['confidence']
318
+ }
319
+
320
+ # Classify gesture if classifier available
321
+ if self.classifier:
322
+ classification = self.classifier.classify_gesture(gesture_description)
323
+ detection['classification'] = classification
324
+
325
+ detections.append(detection)
326
+
327
+ return {
328
+ 'timestamp': timestamp,
329
+ 'frame_number': frame_number,
330
+ 'hands_detected': len(hand_landmarks) if hand_landmarks else 0,
331
+ 'detections': detections
332
+ }
333
+
334
+ def _analyze_video_sequence(self, frame_detections: List[Dict[str, Any]]) -> Dict[str, Any]:
335
+ """
336
+ Analyze sequence of video frame detections.
337
+
338
+ Args:
339
+ frame_detections: List of frame detection results
340
+
341
+ Returns:
342
+ Dictionary containing sequence analysis
343
+ """
344
+ if not self.classifier:
345
+ return {'error': 'Classifier not initialized'}
346
+
347
+ try:
348
+ # Extract gesture descriptions from frames with detections
349
+ gesture_descriptions = []
350
+ for frame_data in frame_detections:
351
+ for detection in frame_data['detections']:
352
+ if detection.get('classification', {}).get('success', False):
353
+ gesture_descriptions.append(detection['gesture_description'])
354
+
355
+ if len(gesture_descriptions) < 2:
356
+ return {'error': 'Not enough gestures for sequence analysis'}
357
+
358
+ # Classify sequence
359
+ sequence_result = self.classifier.classify_sequence(gesture_descriptions)
360
+
361
+ # Add timing information
362
+ sequence_result['start_time'] = frame_detections[0]['timestamp']
363
+ sequence_result['end_time'] = frame_detections[-1]['timestamp']
364
+ sequence_result['duration'] = sequence_result['end_time'] - sequence_result['start_time']
365
+ sequence_result['gesture_count'] = len(gesture_descriptions)
366
+
367
+ return sequence_result
368
+
369
+ except Exception as e:
370
+ return {'error': str(e)}
371
+
372
+ def create_thumbnail(self, file_path: str, size: Tuple[int, int] = (150, 150)) -> Optional[np.ndarray]:
373
+ """
374
+ Create a thumbnail for the given file.
375
+
376
+ Args:
377
+ file_path: Path to the file
378
+ size: Thumbnail size (width, height)
379
+
380
+ Returns:
381
+ Thumbnail image or None if failed
382
+ """
383
+ try:
384
+ file_type = self.get_file_type(file_path)
385
+
386
+ if file_type == 'image':
387
+ image = cv2.imread(file_path)
388
+ if image is not None:
389
+ thumbnail = cv2.resize(image, size)
390
+ return thumbnail
391
+
392
+ elif file_type == 'video':
393
+ cap = cv2.VideoCapture(file_path)
394
+ if cap.isOpened():
395
+ ret, frame = cap.read()
396
+ if ret:
397
+ thumbnail = cv2.resize(frame, size)
398
+ cap.release()
399
+ return thumbnail
400
+ cap.release()
401
+
402
+ except Exception as e:
403
+ print(f"Error creating thumbnail for {file_path}: {e}")
404
+
405
+ return None
406
+
407
+ def get_file_metadata(self, file_path: str) -> Dict[str, Any]:
408
+ """
409
+ Get metadata for a file.
410
+
411
+ Args:
412
+ file_path: Path to the file
413
+
414
+ Returns:
415
+ Dictionary containing file metadata
416
+ """
417
+ metadata = {
418
+ 'file_path': file_path,
419
+ 'filename': os.path.basename(file_path),
420
+ 'file_size': os.path.getsize(file_path) if os.path.exists(file_path) else 0,
421
+ 'file_type': self.get_file_type(file_path),
422
+ 'supported': self.is_supported_file(file_path)
423
+ }
424
+
425
+ try:
426
+ file_type = metadata['file_type']
427
+
428
+ if file_type == 'image':
429
+ image = cv2.imread(file_path)
430
+ if image is not None:
431
+ metadata.update({
432
+ 'width': image.shape[1],
433
+ 'height': image.shape[0],
434
+ 'channels': image.shape[2] if len(image.shape) > 2 else 1
435
+ })
436
+
437
+ elif file_type == 'video':
438
+ cap = cv2.VideoCapture(file_path)
439
+ if cap.isOpened():
440
+ metadata.update({
441
+ 'width': int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
442
+ 'height': int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)),
443
+ 'fps': cap.get(cv2.CAP_PROP_FPS),
444
+ 'frame_count': int(cap.get(cv2.CAP_PROP_FRAME_COUNT)),
445
+ 'duration': int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) / cap.get(cv2.CAP_PROP_FPS) if cap.get(cv2.CAP_PROP_FPS) > 0 else 0
446
+ })
447
+ cap.release()
448
+
449
+ except Exception as e:
450
+ metadata['error'] = str(e)
451
+
452
+ return metadata
453
+
454
+ def batch_process_files(self, file_paths: List[str],
455
+ progress_callback: Optional[callable] = None,
456
+ detailed_progress: Optional[callable] = None) -> List[Dict[str, Any]]:
457
+ """
458
+ Enhanced batch processing with detailed progress tracking.
459
+
460
+ Args:
461
+ file_paths: List of file paths to process
462
+ progress_callback: Optional callback for overall progress updates
463
+ detailed_progress: Optional callback for detailed progress updates
464
+
465
+ Returns:
466
+ List of processing results for each file
467
+ """
468
+ results = []
469
+ total_files = len(file_paths)
470
+
471
+ for i, file_path in enumerate(file_paths):
472
+ # Update detailed progress
473
+ if detailed_progress:
474
+ detailed_progress(f"Processing {os.path.basename(file_path)}...", i, total_files)
475
+
476
+ if not self.is_supported_file(file_path):
477
+ results.append({
478
+ 'success': False,
479
+ 'file_path': file_path,
480
+ 'filename': os.path.basename(file_path),
481
+ 'error': 'Unsupported file format'
482
+ })
483
+ continue
484
+
485
+ try:
486
+ file_type = self.get_file_type(file_path)
487
+
488
+ if file_type == 'image':
489
+ result = self.process_image(file_path)
490
+ elif file_type == 'video':
491
+ result = self.process_video(file_path, progress_callback=None) # Disable nested progress
492
+ else:
493
+ result = {
494
+ 'success': False,
495
+ 'file_path': file_path,
496
+ 'filename': os.path.basename(file_path),
497
+ 'error': 'Unknown file type'
498
+ }
499
+
500
+ # Add metadata
501
+ if result.get('success'):
502
+ metadata = self.get_file_metadata(file_path)
503
+ result.update(metadata)
504
+
505
+ results.append(result)
506
+
507
+ except Exception as e:
508
+ results.append({
509
+ 'success': False,
510
+ 'file_path': file_path,
511
+ 'filename': os.path.basename(file_path),
512
+ 'error': str(e)
513
+ })
514
+
515
+ # Update overall progress
516
+ if progress_callback:
517
+ progress_callback((i + 1) / total_files)
518
+
519
+ return results
520
+
521
+ def save_annotated_image(self, annotated_image: np.ndarray,
522
+ output_path: str) -> bool:
523
+ """
524
+ Save annotated image to file.
525
+
526
+ Args:
527
+ annotated_image: Annotated image array
528
+ output_path: Path to save the image
529
+
530
+ Returns:
531
+ True if saved successfully, False otherwise
532
+ """
533
+ try:
534
+ cv2.imwrite(output_path, annotated_image)
535
+ return True
536
+ except Exception as e:
537
+ print(f"Error saving annotated image: {e}")
538
+ return False
539
+
540
+ def cleanup(self):
541
+ """Clean up resources."""
542
+ if self.hand_detector:
543
+ self.hand_detector.cleanup()
src/src/gemini_classifier.py ADDED
@@ -0,0 +1,420 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Google Gemini Sign Language Classifier
3
+
4
+ This module provides sign language classification using Google's Gemini AI API.
5
+ """
6
+
7
+ import google.generativeai as genai
8
+ import os
9
+ from typing import List, Dict, Any, Optional
10
+ import json
11
+ import time
12
+ from dotenv import load_dotenv
13
+ from .fallback_classifier import FallbackSignLanguageClassifier
14
+
15
+ # Load environment variables
16
+ load_dotenv()
17
+
18
+
19
+ class GeminiSignLanguageClassifier:
20
+ """
21
+ Sign language classifier using Google Gemini AI.
22
+ """
23
+
24
+ def __init__(self, api_key: Optional[str] = None, model: str = "gemini-1.5-flash"):
25
+ """
26
+ Initialize the Gemini classifier.
27
+
28
+ Args:
29
+ api_key: Gemini API key (if None, will use environment variable)
30
+ model: Gemini model to use for classification
31
+ """
32
+ self.api_key = api_key or os.getenv('GEMINI_API_KEY')
33
+ self.model_name = model
34
+
35
+ if not self.api_key:
36
+ raise ValueError("Gemini API key not provided. Set GEMINI_API_KEY environment variable or pass api_key parameter.")
37
+
38
+ # Configure Gemini
39
+ genai.configure(api_key=self.api_key)
40
+ self.model = genai.GenerativeModel(self.model_name)
41
+
42
+ # Enhanced rate limiting for free tier
43
+ self.last_request_time = 0
44
+ self.min_request_interval = 5.0 # 5 seconds between requests for free tier
45
+ self.request_count = 0
46
+ self.request_window_start = time.time()
47
+ self.max_requests_per_minute = 10 # Conservative limit for free tier
48
+
49
+ # Initialize fallback classifier
50
+ self.fallback_classifier = FallbackSignLanguageClassifier()
51
+
52
+ # Debug mode
53
+ self.debug = True
54
+
55
+ print(f"Gemini classifier initialized with fallback support")
56
+
57
+ def classify_gesture(self, gesture_description: str,
58
+ sign_language: str = "ASL",
59
+ context: Optional[str] = None) -> Dict[str, Any]:
60
+ """
61
+ Classify a single gesture using Gemini AI.
62
+
63
+ Args:
64
+ gesture_description: Description of the hand gesture
65
+ sign_language: Sign language type (default: ASL)
66
+ context: Additional context (optional)
67
+
68
+ Returns:
69
+ Classification result dictionary
70
+ """
71
+ self._rate_limit()
72
+
73
+ # Create the prompt
74
+ prompt = self._create_classification_prompt(gesture_description, sign_language, context)
75
+
76
+ if self.debug:
77
+ print(f"\n=== Gemini Classification Debug ===")
78
+ print(f"Input gesture description: {gesture_description}")
79
+ print(f"Prompt sent to Gemini: {prompt[:200]}...")
80
+
81
+ try:
82
+ response = self.model.generate_content(prompt)
83
+ response_content = response.text
84
+
85
+ if self.debug:
86
+ print(f"Gemini response: {response_content}")
87
+
88
+ result = self._parse_response(response_content)
89
+ result['raw_response'] = response_content
90
+ result['success'] = True
91
+ result['method'] = 'gemini_ai'
92
+
93
+ if self.debug:
94
+ print(f"Parsed result: {result}")
95
+ print("=== End Gemini Debug ===\n")
96
+
97
+ return result
98
+
99
+ except Exception as e:
100
+ error_msg = str(e)
101
+ if self.debug:
102
+ print(f"Gemini API Error: {error_msg}")
103
+ print("Falling back to pattern-based classification...")
104
+
105
+ # Use fallback classifier when Gemini API fails
106
+ try:
107
+ fallback_result = self.fallback_classifier.classify_gesture(
108
+ gesture_description, sign_language, context
109
+ )
110
+ fallback_result['fallback_used'] = True
111
+ fallback_result['gemini_error'] = error_msg
112
+
113
+ if self.debug:
114
+ print(f"Fallback result: {fallback_result}")
115
+ print("=== End Gemini Debug ===\n")
116
+
117
+ return fallback_result
118
+
119
+ except Exception as fallback_error:
120
+ if self.debug:
121
+ print(f"Fallback also failed: {str(fallback_error)}")
122
+ print("=== End Gemini Debug ===\n")
123
+
124
+ return {
125
+ 'success': False,
126
+ 'error': error_msg,
127
+ 'fallback_error': str(fallback_error),
128
+ 'letter': None,
129
+ 'word': None,
130
+ 'confidence': 0.0,
131
+ 'description': None,
132
+ 'method': 'gemini_ai'
133
+ }
134
+
135
+ def classify_sequence(self, gesture_descriptions: List[str],
136
+ sign_language: str = "ASL") -> Dict[str, Any]:
137
+ """
138
+ Classify a sequence of gestures using Gemini AI.
139
+
140
+ Args:
141
+ gesture_descriptions: List of gesture descriptions
142
+ sign_language: Sign language type
143
+
144
+ Returns:
145
+ Sequence classification result
146
+ """
147
+ self._rate_limit()
148
+
149
+ # Create sequence prompt
150
+ prompt = self._create_sequence_prompt(gesture_descriptions, sign_language)
151
+
152
+ try:
153
+ response = self.model.generate_content(prompt)
154
+ response_content = response.text
155
+
156
+ result = self._parse_sequence_response(response_content)
157
+ result['raw_response'] = response_content
158
+ result['success'] = True
159
+ result['method'] = 'gemini_ai'
160
+
161
+ return result
162
+
163
+ except Exception as e:
164
+ # Use fallback for sequence classification too
165
+ try:
166
+ fallback_result = self.fallback_classifier.classify_sequence(
167
+ gesture_descriptions, sign_language
168
+ )
169
+ fallback_result['fallback_used'] = True
170
+ fallback_result['gemini_error'] = str(e)
171
+ return fallback_result
172
+
173
+ except Exception as fallback_error:
174
+ return {
175
+ 'success': False,
176
+ 'error': str(e),
177
+ 'fallback_error': str(fallback_error),
178
+ 'word': None,
179
+ 'sentence': None,
180
+ 'confidence': 0.0,
181
+ 'method': 'gemini_ai'
182
+ }
183
+
184
+ def _rate_limit(self):
185
+ """Enhanced rate limiting for Gemini free tier."""
186
+ current_time = time.time()
187
+
188
+ # Reset request count every minute
189
+ if current_time - self.request_window_start >= 60:
190
+ self.request_count = 0
191
+ self.request_window_start = current_time
192
+
193
+ # Check if we've hit the per-minute limit
194
+ if self.request_count >= self.max_requests_per_minute:
195
+ sleep_time = 60 - (current_time - self.request_window_start) + 1
196
+ if self.debug:
197
+ print(f"⏳ Rate limit reached, sleeping for {sleep_time:.1f} seconds...")
198
+ time.sleep(sleep_time)
199
+ self.request_count = 0
200
+ self.request_window_start = time.time()
201
+
202
+ # Ensure minimum interval between requests
203
+ time_since_last_request = current_time - self.last_request_time
204
+ if time_since_last_request < self.min_request_interval:
205
+ sleep_time = self.min_request_interval - time_since_last_request
206
+ if self.debug:
207
+ print(f"⏳ Waiting {sleep_time:.1f} seconds between requests...")
208
+ time.sleep(sleep_time)
209
+
210
+ self.last_request_time = time.time()
211
+ self.request_count += 1
212
+
213
+ def _create_classification_prompt(self, gesture_description: str,
214
+ sign_language: str, context: Optional[str]) -> str:
215
+ """Create enhanced prompt for single gesture classification."""
216
+ prompt = f"""You are an expert ASL (American Sign Language) interpreter. Analyze this hand gesture and provide ONE CLEAR PREDICTION.
217
+
218
+ GESTURE DATA:
219
+ {gesture_description}
220
+
221
+ COMMON ASL PATTERNS TO RECOGNIZE:
222
+ • Index finger pointing = Number "1"
223
+ • Pinky finger only = Pronoun "I"
224
+ • Thumb up = "GOOD" or "YES"
225
+ • All fingers extended = Number "5" or "HELLO"
226
+ • Closed fist = Letter "A" or "S"
227
+ • Index + middle = Number "2"
228
+ • Three fingers = Number "3"
229
+ • Four fingers = Number "4"
230
+ • Index + pinky = "I LOVE YOU"
231
+ • Thumb + index = Letter "L"
232
+
233
+ TASK: Based on the finger positions described, identify what this gesture most likely represents:
234
+ - A single letter (A-Z)
235
+ - A single number (0-9)
236
+ - A complete word (HELLO, GOOD, I, YOU, LOVE, etc.)
237
+
238
+ Even if not a perfect match, provide your best interpretation based on ASL knowledge.
239
+
240
+ """
241
+
242
+ if context:
243
+ prompt += f"Context: {context}\n\n"
244
+
245
+ prompt += """Respond in this EXACT JSON format (choose ONE prediction):
246
+ {
247
+ "letter": "1",
248
+ "word": null,
249
+ "confidence": 0.85,
250
+ "description": "Index finger pointing = Number 1"
251
+ }
252
+
253
+ OR for a word:
254
+ {
255
+ "letter": null,
256
+ "word": "GOOD",
257
+ "confidence": 0.85,
258
+ "description": "Thumb up = GOOD"
259
+ }
260
+
261
+ IMPORTANT: Always provide either a letter OR a word, never both null. Make your best guess based on ASL knowledge."""
262
+
263
+ return prompt
264
+
265
+ def _create_sequence_prompt(self, gesture_descriptions: List[str],
266
+ sign_language: str) -> str:
267
+ """Create prompt for gesture sequence classification."""
268
+ prompt = f"""Analyze this sequence of {sign_language} hand gestures:
269
+
270
+ """
271
+
272
+ for i, description in enumerate(gesture_descriptions, 1):
273
+ prompt += f"Gesture {i}: {description}\n"
274
+
275
+ prompt += f"""
276
+ What word or sentence do these {sign_language} gestures spell out when combined?
277
+ Consider the sequence and flow of the gestures.
278
+
279
+ Respond in JSON format:
280
+ {{
281
+ "word": "HELLO" or null,
282
+ "sentence": "HELLO WORLD" or null,
283
+ "confidence": 0.85,
284
+ "individual_letters": ["H", "E", "L", "L", "O"]
285
+ }}"""
286
+
287
+ return prompt
288
+
289
+ def _parse_response(self, response_text: str) -> Dict[str, Any]:
290
+ """Parse Gemini response for single gesture classification."""
291
+ try:
292
+ # Try to parse as JSON first
293
+ if '{' in response_text and '}' in response_text:
294
+ json_start = response_text.find('{')
295
+ json_end = response_text.rfind('}') + 1
296
+ json_str = response_text[json_start:json_end]
297
+ result = json.loads(json_str)
298
+
299
+ # Extract values
300
+ letter = result.get('letter')
301
+ word = result.get('word')
302
+ confidence = float(result.get('confidence', 0.0))
303
+ description = result.get('description', '')
304
+
305
+ # If both are null, try to extract from description
306
+ if not letter and not word:
307
+ if self.debug:
308
+ print("⚠️ Gemini returned null values, trying to extract from description...")
309
+
310
+ # Try to extract prediction from description
311
+ desc_lower = description.lower()
312
+
313
+ # Look for numbers
314
+ for num in ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0']:
315
+ if f"number '{num}'" in desc_lower or f"number {num}" in desc_lower:
316
+ letter = num
317
+ break
318
+
319
+ # Look for letters
320
+ if not letter:
321
+ for char in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ':
322
+ if f"letter '{char.lower()}'" in desc_lower or f"letter {char.lower()}" in desc_lower:
323
+ letter = char
324
+ break
325
+
326
+ # Look for words
327
+ if not letter and not word:
328
+ common_words = ['good', 'hello', 'i', 'you', 'love', 'yes', 'no', 'please', 'thank you']
329
+ for w in common_words:
330
+ if w in desc_lower:
331
+ word = w.upper()
332
+ break
333
+
334
+ return {
335
+ 'letter': letter,
336
+ 'word': word,
337
+ 'confidence': confidence,
338
+ 'description': description
339
+ }
340
+ else:
341
+ # Fallback: simple text parsing
342
+ return self._parse_text_response(response_text)
343
+
344
+ except (json.JSONDecodeError, ValueError):
345
+ return self._parse_text_response(response_text)
346
+
347
+ def _parse_sequence_response(self, response_text: str) -> Dict[str, Any]:
348
+ """Parse Gemini response for sequence classification."""
349
+ try:
350
+ if '{' in response_text and '}' in response_text:
351
+ json_start = response_text.find('{')
352
+ json_end = response_text.rfind('}') + 1
353
+ json_str = response_text[json_start:json_end]
354
+ result = json.loads(json_str)
355
+
356
+ return {
357
+ 'word': result.get('word'),
358
+ 'sentence': result.get('sentence'),
359
+ 'confidence': float(result.get('confidence', 0.0)),
360
+ 'individual_letters': result.get('individual_letters', [])
361
+ }
362
+ else:
363
+ return self._parse_sequence_text_response(response_text)
364
+
365
+ except (json.JSONDecodeError, ValueError):
366
+ return self._parse_sequence_text_response(response_text)
367
+
368
+ def _parse_text_response(self, response_text: str) -> Dict[str, Any]:
369
+ """Enhanced fallback text parsing for single gesture."""
370
+ response_lower = response_text.lower()
371
+
372
+ # Common ASL words to look for
373
+ common_words = ['hello', 'hungry', 'thank you', 'please', 'sorry', 'yes', 'no',
374
+ 'i', 'you', 'love', 'help', 'more', 'water', 'eat', 'drink',
375
+ 'good', 'bad', 'happy', 'sad', 'stop', 'go', 'come', 'home']
376
+
377
+ # Look for words first (priority)
378
+ word = None
379
+ for w in common_words:
380
+ if w in response_lower:
381
+ word = w.upper()
382
+ break
383
+
384
+ # Look for letter patterns
385
+ letter = None
386
+ if not word: # Only look for letters if no word found
387
+ import re
388
+ # Look for single letters
389
+ letter_match = re.search(r'\b([A-Z])\b', response_text.upper())
390
+ if letter_match:
391
+ letter = letter_match.group(1)
392
+
393
+ # Look for numbers
394
+ number_match = re.search(r'\b([0-9])\b', response_text)
395
+ if number_match:
396
+ letter = number_match.group(1)
397
+
398
+ # Extract confidence if mentioned
399
+ confidence = 0.5 # Default
400
+ conf_match = re.search(r'(\d+(?:\.\d+)?)\s*%', response_text)
401
+ if conf_match:
402
+ confidence = float(conf_match.group(1)) / 100
403
+
404
+ return {
405
+ 'letter': letter,
406
+ 'word': word,
407
+ 'confidence': confidence,
408
+ 'description': f"Parsed from text: {response_text[:100]}..."
409
+ }
410
+
411
+ def _parse_sequence_text_response(self, response_text: str) -> Dict[str, Any]:
412
+ """Fallback text parsing for sequence."""
413
+ # Simple implementation for sequence parsing
414
+ return {
415
+ 'word': None,
416
+ 'sentence': None,
417
+ 'confidence': 0.3,
418
+ 'individual_letters': [],
419
+ 'description': f"Text parsing fallback: {response_text[:100]}..."
420
+ }
src/src/gesture_extractor.py ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Gesture Feature Extraction Module
3
+ Processes hand landmark data into simplified format for OpenAI API classification
4
+ """
5
+
6
+ import numpy as np
7
+ import math
8
+ from typing import List, Dict, Any, Tuple, Optional
9
+
10
+
11
+ class GestureExtractor:
12
+ """
13
+ A class for extracting gesture features from hand landmarks.
14
+ """
15
+
16
+ def __init__(self):
17
+ """Initialize the GestureExtractor."""
18
+ # Define finger tip and base indices for easier processing
19
+ self.finger_tips = [4, 8, 12, 16, 20] # Thumb, Index, Middle, Ring, Pinky tips
20
+ self.finger_bases = [2, 5, 9, 13, 17] # Finger base joints
21
+ self.finger_pips = [3, 6, 10, 14, 18] # PIP joints
22
+
23
+ def normalize_landmarks(self, hand_landmarks: Dict[str, Any]) -> List[Dict[str, float]]:
24
+ """
25
+ Normalize hand landmarks relative to wrist position and hand size.
26
+
27
+ Args:
28
+ hand_landmarks: Hand landmark data from MediaPipe
29
+
30
+ Returns:
31
+ List of normalized landmark coordinates
32
+ """
33
+ landmarks = hand_landmarks['landmarks']
34
+
35
+ # Get wrist position (landmark 0)
36
+ wrist = landmarks[0]
37
+ wrist_x, wrist_y = wrist['x'], wrist['y']
38
+
39
+ # Calculate hand size (distance from wrist to middle finger MCP)
40
+ middle_mcp = landmarks[9]
41
+ hand_size = math.sqrt(
42
+ (middle_mcp['x'] - wrist_x) ** 2 +
43
+ (middle_mcp['y'] - wrist_y) ** 2
44
+ )
45
+
46
+ # Avoid division by zero
47
+ if hand_size == 0:
48
+ hand_size = 1.0
49
+
50
+ # Normalize all landmarks
51
+ normalized_landmarks = []
52
+ for landmark in landmarks:
53
+ normalized = {
54
+ 'x': (landmark['x'] - wrist_x) / hand_size,
55
+ 'y': (landmark['y'] - wrist_y) / hand_size,
56
+ 'z': landmark['z'] / hand_size
57
+ }
58
+ normalized_landmarks.append(normalized)
59
+
60
+ return normalized_landmarks
61
+
62
+ def extract_finger_states(self, normalized_landmarks: List[Dict[str, float]]) -> Dict[str, bool]:
63
+ """
64
+ Determine which fingers are extended or closed.
65
+
66
+ Args:
67
+ normalized_landmarks: Normalized landmark coordinates
68
+
69
+ Returns:
70
+ Dictionary with finger states (True = extended, False = closed)
71
+ """
72
+ finger_names = ['thumb', 'index', 'middle', 'ring', 'pinky']
73
+ finger_states = {}
74
+
75
+ for i, finger_name in enumerate(finger_names):
76
+ tip_idx = self.finger_tips[i]
77
+ pip_idx = self.finger_pips[i]
78
+
79
+ # For thumb, use different logic (horizontal movement)
80
+ if finger_name == 'thumb':
81
+ # Compare thumb tip with thumb IP joint
82
+ tip_x = normalized_landmarks[tip_idx]['x']
83
+ ip_x = normalized_landmarks[3]['x'] # Thumb IP joint
84
+ finger_states[finger_name] = abs(tip_x - ip_x) > 0.1
85
+ else:
86
+ # For other fingers, compare tip Y with PIP Y
87
+ tip_y = normalized_landmarks[tip_idx]['y']
88
+ pip_y = normalized_landmarks[pip_idx]['y']
89
+ finger_states[finger_name] = tip_y < pip_y # Extended if tip is above PIP
90
+
91
+ return finger_states
92
+
93
+ def calculate_angles(self, normalized_landmarks: List[Dict[str, float]]) -> Dict[str, float]:
94
+ """
95
+ Calculate angles between key landmarks.
96
+
97
+ Args:
98
+ normalized_landmarks: Normalized landmark coordinates
99
+
100
+ Returns:
101
+ Dictionary of calculated angles
102
+ """
103
+ angles = {}
104
+
105
+ # Calculate angle between thumb and index finger
106
+ thumb_tip = normalized_landmarks[4]
107
+ index_tip = normalized_landmarks[8]
108
+ wrist = normalized_landmarks[0]
109
+
110
+ # Vector from wrist to thumb tip
111
+ thumb_vector = np.array([thumb_tip['x'] - wrist['x'], thumb_tip['y'] - wrist['y']])
112
+ # Vector from wrist to index tip
113
+ index_vector = np.array([index_tip['x'] - wrist['x'], index_tip['y'] - wrist['y']])
114
+
115
+ # Calculate angle between vectors
116
+ dot_product = np.dot(thumb_vector, index_vector)
117
+ norms = np.linalg.norm(thumb_vector) * np.linalg.norm(index_vector)
118
+
119
+ if norms > 0:
120
+ cos_angle = dot_product / norms
121
+ cos_angle = np.clip(cos_angle, -1.0, 1.0) # Ensure valid range
122
+ angles['thumb_index_angle'] = math.degrees(math.acos(cos_angle))
123
+ else:
124
+ angles['thumb_index_angle'] = 0.0
125
+
126
+ # Calculate hand orientation (angle of palm)
127
+ middle_mcp = normalized_landmarks[9]
128
+ wrist = normalized_landmarks[0]
129
+ palm_vector = np.array([middle_mcp['x'] - wrist['x'], middle_mcp['y'] - wrist['y']])
130
+
131
+ # Angle with vertical axis
132
+ vertical = np.array([0, -1]) # Pointing up
133
+ dot_product = np.dot(palm_vector, vertical)
134
+ norms = np.linalg.norm(palm_vector) * np.linalg.norm(vertical)
135
+
136
+ if norms > 0:
137
+ cos_angle = dot_product / norms
138
+ cos_angle = np.clip(cos_angle, -1.0, 1.0)
139
+ angles['palm_orientation'] = math.degrees(math.acos(cos_angle))
140
+ else:
141
+ angles['palm_orientation'] = 0.0
142
+
143
+ return angles
144
+
145
+ def extract_distances(self, normalized_landmarks: List[Dict[str, float]]) -> Dict[str, float]:
146
+ """
147
+ Calculate distances between key landmarks.
148
+
149
+ Args:
150
+ normalized_landmarks: Normalized landmark coordinates
151
+
152
+ Returns:
153
+ Dictionary of calculated distances
154
+ """
155
+ distances = {}
156
+
157
+ # Distance between thumb tip and index tip
158
+ thumb_tip = normalized_landmarks[4]
159
+ index_tip = normalized_landmarks[8]
160
+ distances['thumb_index_distance'] = math.sqrt(
161
+ (thumb_tip['x'] - index_tip['x']) ** 2 +
162
+ (thumb_tip['y'] - index_tip['y']) ** 2
163
+ )
164
+
165
+ # Distance between index and middle finger tips
166
+ middle_tip = normalized_landmarks[12]
167
+ distances['index_middle_distance'] = math.sqrt(
168
+ (index_tip['x'] - middle_tip['x']) ** 2 +
169
+ (index_tip['y'] - middle_tip['y']) ** 2
170
+ )
171
+
172
+ # Distance from wrist to each fingertip
173
+ wrist = normalized_landmarks[0]
174
+ for i, finger_name in enumerate(['thumb', 'index', 'middle', 'ring', 'pinky']):
175
+ tip_idx = self.finger_tips[i]
176
+ tip = normalized_landmarks[tip_idx]
177
+ distances[f'wrist_{finger_name}_distance'] = math.sqrt(
178
+ (tip['x'] - wrist['x']) ** 2 +
179
+ (tip['y'] - wrist['y']) ** 2
180
+ )
181
+
182
+ return distances
183
+
184
+ def create_gesture_description(self, hand_landmarks: Dict[str, Any]) -> str:
185
+ """
186
+ Create a textual description of the gesture for OpenAI API.
187
+
188
+ Args:
189
+ hand_landmarks: Hand landmark data from MediaPipe
190
+
191
+ Returns:
192
+ String description of the gesture
193
+ """
194
+ normalized_landmarks = self.normalize_landmarks(hand_landmarks)
195
+ finger_states = self.extract_finger_states(normalized_landmarks)
196
+ angles = self.calculate_angles(normalized_landmarks)
197
+ distances = self.extract_distances(normalized_landmarks)
198
+
199
+ # Create description
200
+ description_parts = []
201
+
202
+ # Hand label
203
+ description_parts.append(f"Hand: {hand_landmarks['label']}")
204
+
205
+ # Finger states
206
+ extended_fingers = [name for name, extended in finger_states.items() if extended]
207
+ closed_fingers = [name for name, extended in finger_states.items() if not extended]
208
+
209
+ if extended_fingers:
210
+ description_parts.append(f"Extended fingers: {', '.join(extended_fingers)}")
211
+ if closed_fingers:
212
+ description_parts.append(f"Closed fingers: {', '.join(closed_fingers)}")
213
+
214
+ # Key measurements
215
+ description_parts.append(f"Thumb-index angle: {angles['thumb_index_angle']:.1f} degrees")
216
+ description_parts.append(f"Thumb-index distance: {distances['thumb_index_distance']:.3f}")
217
+ description_parts.append(f"Palm orientation: {angles['palm_orientation']:.1f} degrees")
218
+
219
+ # Special gesture patterns
220
+ if all(not extended for extended in finger_states.values()):
221
+ description_parts.append("Pattern: Closed fist")
222
+ elif all(extended for extended in finger_states.values()):
223
+ description_parts.append("Pattern: Open hand")
224
+ elif finger_states['index'] and not any(finger_states[f] for f in ['middle', 'ring', 'pinky']):
225
+ description_parts.append("Pattern: Pointing gesture")
226
+ elif finger_states['thumb'] and finger_states['index'] and distances['thumb_index_distance'] < 0.1:
227
+ description_parts.append("Pattern: Pinch gesture")
228
+
229
+ return "; ".join(description_parts)
230
+
231
+ def extract_features_vector(self, hand_landmarks: Dict[str, Any]) -> np.ndarray:
232
+ """
233
+ Extract numerical feature vector for machine learning models.
234
+
235
+ Args:
236
+ hand_landmarks: Hand landmark data from MediaPipe
237
+
238
+ Returns:
239
+ NumPy array of features
240
+ """
241
+ normalized_landmarks = self.normalize_landmarks(hand_landmarks)
242
+ finger_states = self.extract_finger_states(normalized_landmarks)
243
+ angles = self.calculate_angles(normalized_landmarks)
244
+ distances = self.extract_distances(normalized_landmarks)
245
+
246
+ # Create feature vector
247
+ features = []
248
+
249
+ # Finger states (5 features)
250
+ for finger in ['thumb', 'index', 'middle', 'ring', 'pinky']:
251
+ features.append(1.0 if finger_states[finger] else 0.0)
252
+
253
+ # Angles (2 features)
254
+ features.extend([
255
+ angles['thumb_index_angle'] / 180.0, # Normalize to 0-1
256
+ angles['palm_orientation'] / 180.0
257
+ ])
258
+
259
+ # Distances (7 features)
260
+ features.extend([
261
+ distances['thumb_index_distance'],
262
+ distances['index_middle_distance'],
263
+ distances['wrist_thumb_distance'],
264
+ distances['wrist_index_distance'],
265
+ distances['wrist_middle_distance'],
266
+ distances['wrist_ring_distance'],
267
+ distances['wrist_pinky_distance']
268
+ ])
269
+
270
+ return np.array(features)
src/src/hand_detector.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Hand Landmark Detection Module using MediaPipe
3
+ """
4
+
5
+ import cv2
6
+ import mediapipe as mp
7
+ import numpy as np
8
+ from typing import List, Tuple, Optional, Dict, Any
9
+
10
+
11
+ class HandDetector:
12
+ """
13
+ A class for detecting hand landmarks using MediaPipe Hands.
14
+ """
15
+
16
+ def __init__(self,
17
+ static_image_mode: bool = False,
18
+ max_num_hands: int = 2,
19
+ min_detection_confidence: float = 0.5, # Lowered for better detection
20
+ min_tracking_confidence: float = 0.3): # Lowered for better detection
21
+ """
22
+ Initialize the HandDetector.
23
+
24
+ Args:
25
+ static_image_mode: Whether to treat input as static images
26
+ max_num_hands: Maximum number of hands to detect
27
+ min_detection_confidence: Minimum confidence for hand detection
28
+ min_tracking_confidence: Minimum confidence for hand tracking
29
+ """
30
+ self.static_image_mode = static_image_mode
31
+ self.max_num_hands = max_num_hands
32
+ self.min_detection_confidence = min_detection_confidence
33
+ self.min_tracking_confidence = min_tracking_confidence
34
+
35
+ # Initialize MediaPipe hands
36
+ self.mp_hands = mp.solutions.hands
37
+ self.hands = self.mp_hands.Hands(
38
+ static_image_mode=self.static_image_mode,
39
+ max_num_hands=self.max_num_hands,
40
+ min_detection_confidence=self.min_detection_confidence,
41
+ min_tracking_confidence=self.min_tracking_confidence
42
+ )
43
+ self.mp_drawing = mp.solutions.drawing_utils
44
+ self.mp_drawing_styles = mp.solutions.drawing_styles
45
+
46
+ def detect_hands(self, image: np.ndarray) -> Tuple[np.ndarray, List[Dict[str, Any]]]:
47
+ """
48
+ Detect hands in the given image.
49
+
50
+ Args:
51
+ image: Input image as numpy array (BGR format)
52
+
53
+ Returns:
54
+ Tuple of (annotated_image, hand_landmarks_list)
55
+ """
56
+ # Convert BGR to RGB
57
+ rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
58
+
59
+ # Process the image
60
+ results = self.hands.process(rgb_image)
61
+
62
+ # Create a copy of the image for annotation
63
+ annotated_image = image.copy()
64
+
65
+ hand_landmarks_list = []
66
+
67
+ if results.multi_hand_landmarks:
68
+ for idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
69
+ # Get hand classification (Left/Right)
70
+ hand_label = "Unknown"
71
+ if results.multi_handedness:
72
+ hand_label = results.multi_handedness[idx].classification[0].label
73
+
74
+ # Draw landmarks on the image
75
+ self.mp_drawing.draw_landmarks(
76
+ annotated_image,
77
+ hand_landmarks,
78
+ self.mp_hands.HAND_CONNECTIONS,
79
+ self.mp_drawing_styles.get_default_hand_landmarks_style(),
80
+ self.mp_drawing_styles.get_default_hand_connections_style()
81
+ )
82
+
83
+ # Extract landmark coordinates
84
+ landmarks = []
85
+ for landmark in hand_landmarks.landmark:
86
+ landmarks.append({
87
+ 'x': landmark.x,
88
+ 'y': landmark.y,
89
+ 'z': landmark.z
90
+ })
91
+
92
+ hand_data = {
93
+ 'label': hand_label,
94
+ 'landmarks': landmarks,
95
+ 'confidence': results.multi_handedness[idx].classification[0].score if results.multi_handedness else 0.0
96
+ }
97
+
98
+ hand_landmarks_list.append(hand_data)
99
+
100
+ return annotated_image, hand_landmarks_list
101
+
102
+ def get_landmark_positions(self, hand_landmarks: List[Dict[str, Any]],
103
+ image_width: int, image_height: int) -> List[Tuple[int, int]]:
104
+ """
105
+ Convert normalized landmarks to pixel coordinates.
106
+
107
+ Args:
108
+ hand_landmarks: List of hand landmark data
109
+ image_width: Width of the image
110
+ image_height: Height of the image
111
+
112
+ Returns:
113
+ List of (x, y) pixel coordinates
114
+ """
115
+ positions = []
116
+ for hand_data in hand_landmarks:
117
+ hand_positions = []
118
+ for landmark in hand_data['landmarks']:
119
+ x = int(landmark['x'] * image_width)
120
+ y = int(landmark['y'] * image_height)
121
+ hand_positions.append((x, y))
122
+ positions.append(hand_positions)
123
+
124
+ return positions
125
+
126
+ def get_bounding_box(self, hand_landmarks: Dict[str, Any],
127
+ image_width: int, image_height: int) -> Tuple[int, int, int, int]:
128
+ """
129
+ Get bounding box for detected hand.
130
+
131
+ Args:
132
+ hand_landmarks: Hand landmark data
133
+ image_width: Width of the image
134
+ image_height: Height of the image
135
+
136
+ Returns:
137
+ Tuple of (x_min, y_min, x_max, y_max)
138
+ """
139
+ x_coords = [landmark['x'] * image_width for landmark in hand_landmarks['landmarks']]
140
+ y_coords = [landmark['y'] * image_height for landmark in hand_landmarks['landmarks']]
141
+
142
+ x_min = int(min(x_coords))
143
+ y_min = int(min(y_coords))
144
+ x_max = int(max(x_coords))
145
+ y_max = int(max(y_coords))
146
+
147
+ return x_min, y_min, x_max, y_max
148
+
149
+ def is_hand_closed(self, hand_landmarks: Dict[str, Any]) -> bool:
150
+ """
151
+ Simple heuristic to determine if hand is closed (fist).
152
+
153
+ Args:
154
+ hand_landmarks: Hand landmark data
155
+
156
+ Returns:
157
+ Boolean indicating if hand appears closed
158
+ """
159
+ landmarks = hand_landmarks['landmarks']
160
+
161
+ # Check if fingertips are below their respective PIP joints
162
+ # Thumb: tip (4) vs IP (3)
163
+ # Index: tip (8) vs PIP (6)
164
+ # Middle: tip (12) vs PIP (10)
165
+ # Ring: tip (16) vs PIP (14)
166
+ # Pinky: tip (20) vs PIP (18)
167
+
168
+ finger_tips = [4, 8, 12, 16, 20]
169
+ finger_pips = [3, 6, 10, 14, 18]
170
+
171
+ closed_fingers = 0
172
+
173
+ for tip, pip in zip(finger_tips, finger_pips):
174
+ if landmarks[tip]['y'] > landmarks[pip]['y']: # tip below pip
175
+ closed_fingers += 1
176
+
177
+ # Consider hand closed if 4 or more fingers are closed
178
+ return closed_fingers >= 4
179
+
180
+ def cleanup(self):
181
+ """
182
+ Clean up MediaPipe resources.
183
+ """
184
+ if hasattr(self, 'hands'):
185
+ self.hands.close()
186
+
187
+
188
+ # Landmark indices for reference
189
+ HAND_LANDMARKS = {
190
+ 'WRIST': 0,
191
+ 'THUMB_CMC': 1, 'THUMB_MCP': 2, 'THUMB_IP': 3, 'THUMB_TIP': 4,
192
+ 'INDEX_FINGER_MCP': 5, 'INDEX_FINGER_PIP': 6, 'INDEX_FINGER_DIP': 7, 'INDEX_FINGER_TIP': 8,
193
+ 'MIDDLE_FINGER_MCP': 9, 'MIDDLE_FINGER_PIP': 10, 'MIDDLE_FINGER_DIP': 11, 'MIDDLE_FINGER_TIP': 12,
194
+ 'RING_FINGER_MCP': 13, 'RING_FINGER_PIP': 14, 'RING_FINGER_DIP': 15, 'RING_FINGER_TIP': 16,
195
+ 'PINKY_MCP': 17, 'PINKY_PIP': 18, 'PINKY_DIP': 19, 'PINKY_TIP': 20
196
+ }
src/src/openai_classifier.py ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ OpenAI API Integration for Sign Language Classification
3
+ """
4
+
5
+ from openai import OpenAI
6
+ import os
7
+ from typing import List, Dict, Any, Optional
8
+ import json
9
+ import time
10
+ from dotenv import load_dotenv
11
+ from .fallback_classifier import FallbackSignLanguageClassifier
12
+
13
+ # Load environment variables
14
+ load_dotenv()
15
+
16
+
17
+ class SignLanguageClassifier:
18
+ """
19
+ A class for classifying sign language gestures using OpenAI API.
20
+ """
21
+
22
+ def __init__(self, api_key: Optional[str] = None, model: str = "gpt-3.5-turbo"):
23
+ """
24
+ Initialize the SignLanguageClassifier.
25
+
26
+ Args:
27
+ api_key: OpenAI API key (if None, will use environment variable)
28
+ model: OpenAI model to use for classification
29
+ """
30
+ self.api_key = api_key or os.getenv('OPENAI_API_KEY')
31
+ self.model = model
32
+
33
+ if not self.api_key:
34
+ raise ValueError("OpenAI API key not provided. Set OPENAI_API_KEY environment variable or pass api_key parameter.")
35
+
36
+ # Initialize OpenAI client with new format
37
+ self.client = OpenAI(api_key=self.api_key)
38
+
39
+ # Rate limiting
40
+ self.last_request_time = 0
41
+ self.min_request_interval = 1.0 # Minimum seconds between requests
42
+
43
+ # Debug mode
44
+ self.debug = True
45
+
46
+ # Initialize fallback classifier
47
+ self.fallback_classifier = FallbackSignLanguageClassifier()
48
+
49
+ print(f"OpenAI classifier initialized with fallback support")
50
+
51
+ def _rate_limit(self):
52
+ """Implement simple rate limiting."""
53
+ current_time = time.time()
54
+ time_since_last = current_time - self.last_request_time
55
+
56
+ if time_since_last < self.min_request_interval:
57
+ time.sleep(self.min_request_interval - time_since_last)
58
+
59
+ self.last_request_time = time.time()
60
+
61
+ def classify_gesture(self, gesture_description: str,
62
+ sign_language: str = "ASL",
63
+ context: Optional[str] = None) -> Dict[str, Any]:
64
+ """
65
+ Classify a gesture using OpenAI API.
66
+
67
+ Args:
68
+ gesture_description: Textual description of the gesture
69
+ sign_language: Type of sign language (ASL, ISL, etc.)
70
+ context: Additional context for classification
71
+
72
+ Returns:
73
+ Dictionary containing classification results
74
+ """
75
+ self._rate_limit()
76
+
77
+ # Create the prompt
78
+ prompt = self._create_classification_prompt(gesture_description, sign_language, context)
79
+
80
+ if self.debug:
81
+ print(f"\n=== OpenAI Classification Debug ===")
82
+ print(f"Input gesture description: {gesture_description}")
83
+ print(f"Prompt sent to OpenAI: {prompt}")
84
+
85
+ try:
86
+ response = self.client.chat.completions.create(
87
+ model=self.model,
88
+ messages=[
89
+ {"role": "system", "content": self._get_system_prompt(sign_language)},
90
+ {"role": "user", "content": prompt}
91
+ ],
92
+ max_tokens=200,
93
+ temperature=0.3,
94
+ top_p=0.9
95
+ )
96
+
97
+ response_content = response.choices[0].message.content
98
+
99
+ if self.debug:
100
+ print(f"OpenAI response: {response_content}")
101
+
102
+ result = self._parse_response(response_content)
103
+ result['raw_response'] = response_content
104
+ result['success'] = True
105
+
106
+ if self.debug:
107
+ print(f"Parsed result: {result}")
108
+ print("=== End Debug ===\n")
109
+
110
+ return result
111
+
112
+ except Exception as e:
113
+ error_msg = str(e)
114
+ if self.debug:
115
+ print(f"OpenAI API Error: {error_msg}")
116
+ print("Falling back to pattern-based classification...")
117
+
118
+ # Use fallback classifier when OpenAI API fails
119
+ try:
120
+ fallback_result = self.fallback_classifier.classify_gesture(
121
+ gesture_description, sign_language, context
122
+ )
123
+ fallback_result['fallback_used'] = True
124
+ fallback_result['openai_error'] = error_msg
125
+
126
+ if self.debug:
127
+ print(f"Fallback result: {fallback_result}")
128
+ print("=== End Debug ===\n")
129
+
130
+ return fallback_result
131
+
132
+ except Exception as fallback_error:
133
+ if self.debug:
134
+ print(f"Fallback also failed: {str(fallback_error)}")
135
+ print("=== End Debug ===\n")
136
+
137
+ return {
138
+ 'success': False,
139
+ 'error': error_msg,
140
+ 'fallback_error': str(fallback_error),
141
+ 'letter': None,
142
+ 'word': None,
143
+ 'confidence': 0.0,
144
+ 'description': None
145
+ }
146
+
147
+ def classify_sequence(self, gesture_descriptions: List[str],
148
+ sign_language: str = "ASL") -> Dict[str, Any]:
149
+ """
150
+ Classify a sequence of gestures to form words or sentences.
151
+
152
+ Args:
153
+ gesture_descriptions: List of gesture descriptions
154
+ sign_language: Type of sign language
155
+
156
+ Returns:
157
+ Dictionary containing sequence classification results
158
+ """
159
+ self._rate_limit()
160
+
161
+ # Create sequence prompt
162
+ prompt = self._create_sequence_prompt(gesture_descriptions, sign_language)
163
+
164
+ try:
165
+ response = self.client.chat.completions.create(
166
+ model=self.model,
167
+ messages=[
168
+ {"role": "system", "content": self._get_sequence_system_prompt(sign_language)},
169
+ {"role": "user", "content": prompt}
170
+ ],
171
+ max_tokens=300,
172
+ temperature=0.3,
173
+ top_p=0.9
174
+ )
175
+
176
+ result = self._parse_sequence_response(response.choices[0].message.content)
177
+ result['raw_response'] = response.choices[0].message.content
178
+ result['success'] = True
179
+
180
+ return result
181
+
182
+ except Exception as e:
183
+ # Use fallback for sequence classification too
184
+ try:
185
+ fallback_result = self.fallback_classifier.classify_sequence(
186
+ gesture_descriptions, sign_language
187
+ )
188
+ fallback_result['fallback_used'] = True
189
+ fallback_result['openai_error'] = str(e)
190
+ return fallback_result
191
+
192
+ except Exception as fallback_error:
193
+ return {
194
+ 'success': False,
195
+ 'error': str(e),
196
+ 'fallback_error': str(fallback_error),
197
+ 'word': None,
198
+ 'sentence': None,
199
+ 'confidence': 0.0
200
+ }
201
+
202
+ def _get_system_prompt(self, sign_language: str) -> str:
203
+ """Get system prompt for gesture classification."""
204
+ return f"""You are an expert in {sign_language} (American Sign Language) recognition.
205
+ Your task is to provide ONE CLEAR PREDICTION for each hand gesture.
206
+
207
+ PRIORITY ORDER:
208
+ 1. If it's a complete word sign (like "HELLO", "HUNGRY", "THANK YOU"), identify the WORD
209
+ 2. If it's a letter/number sign, identify the LETTER or NUMBER
210
+ 3. If uncertain, provide your best single guess
211
+
212
+ Respond in JSON format:
213
+ {{
214
+ "letter": "A" or null,
215
+ "word": "HUNGRY" or null,
216
+ "confidence": 0.85,
217
+ "description": "Brief explanation"
218
+ }}
219
+
220
+ IMPORTANT RULES:
221
+ - Provide either a letter OR a word, not both
222
+ - Words take priority over letters
223
+ - Be decisive - give your best single prediction
224
+ - Common words: HELLO, HUNGRY, THANK YOU, PLEASE, SORRY, YES, NO, I, YOU, LOVE, etc.
225
+ - Letters: A-Z, Numbers: 0-9
226
+ - Confidence should reflect your certainty (0.1 = very uncertain, 0.9 = very certain)
227
+
228
+ Focus on the most likely single interpretation of the gesture."""
229
+
230
+ def _get_sequence_system_prompt(self, sign_language: str) -> str:
231
+ """Get system prompt for sequence classification."""
232
+ return f"""You are an expert in {sign_language} recognition specializing in interpreting sequences of gestures.
233
+ Your task is to analyze a sequence of hand gestures and determine if they form a word or sentence.
234
+
235
+ Respond in JSON format:
236
+ {{
237
+ "word": "HELLO" or null,
238
+ "sentence": "HELLO WORLD" or null,
239
+ "confidence": 0.85,
240
+ "individual_letters": ["H", "E", "L", "L", "O"]
241
+ }}
242
+
243
+ Consider:
244
+ - Sequential letter spelling
245
+ - Common {sign_language} words and phrases
246
+ - Context and flow between gestures"""
247
+
248
+ def _create_classification_prompt(self, gesture_description: str,
249
+ sign_language: str, context: Optional[str]) -> str:
250
+ """Create enhanced prompt for single gesture classification."""
251
+ prompt = f"""You are an expert ASL (American Sign Language) interpreter. Analyze this hand gesture and provide ONE CLEAR PREDICTION.
252
+
253
+ GESTURE DATA:
254
+ {gesture_description}
255
+
256
+ TASK: Identify what this gesture represents. Respond with EXACTLY ONE of these:
257
+ - A single letter (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y, Z)
258
+ - A single number (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)
259
+ - A complete word (HELLO, HUNGRY, THANK YOU, PLEASE, SORRY, YES, NO, I, YOU, LOVE, HELP, MORE, WATER, EAT, DRINK, etc.)
260
+
261
+ PRIORITY: If this could be a word sign, choose the WORD. If it's clearly a letter/number, choose that.
262
+
263
+ COMMON ASL PATTERNS:
264
+ - Closed fist = A, S, or numbers
265
+ - Open hand = 5, HELLO, or STOP
266
+ - Pointing = 1, I, or YOU
267
+ - Pinch gesture = F, 9, or SMALL
268
+
269
+ """
270
+
271
+ if context:
272
+ prompt += f"Context: {context}\n\n"
273
+
274
+ prompt += """Respond in this EXACT JSON format:
275
+ {
276
+ "letter": "A" or null,
277
+ "word": "HELLO" or null,
278
+ "confidence": 0.85,
279
+ "description": "Brief explanation"
280
+ }
281
+
282
+ Be decisive and confident in your single prediction."""
283
+
284
+ return prompt
285
+
286
+ def _create_sequence_prompt(self, gesture_descriptions: List[str],
287
+ sign_language: str) -> str:
288
+ """Create prompt for gesture sequence classification."""
289
+ prompt = f"""Analyze this sequence of {sign_language} hand gestures:
290
+
291
+ """
292
+
293
+ for i, description in enumerate(gesture_descriptions, 1):
294
+ prompt += f"Gesture {i}: {description}\n"
295
+
296
+ prompt += f"""
297
+ What word or sentence do these {sign_language} gestures spell out when combined?
298
+ Consider the sequence and flow of the gestures."""
299
+
300
+ return prompt
301
+
302
+ def _parse_response(self, response_text: str) -> Dict[str, Any]:
303
+ """Parse OpenAI response for single gesture classification."""
304
+ try:
305
+ # Try to parse as JSON first
306
+ if '{' in response_text and '}' in response_text:
307
+ json_start = response_text.find('{')
308
+ json_end = response_text.rfind('}') + 1
309
+ json_str = response_text[json_start:json_end]
310
+ result = json.loads(json_str)
311
+
312
+ # Ensure required fields exist
313
+ return {
314
+ 'letter': result.get('letter'),
315
+ 'word': result.get('word'),
316
+ 'confidence': float(result.get('confidence', 0.0)),
317
+ 'description': result.get('description', '')
318
+ }
319
+ else:
320
+ # Fallback: simple text parsing
321
+ return self._parse_text_response(response_text)
322
+
323
+ except (json.JSONDecodeError, ValueError):
324
+ return self._parse_text_response(response_text)
325
+
326
+ def _parse_sequence_response(self, response_text: str) -> Dict[str, Any]:
327
+ """Parse OpenAI response for sequence classification."""
328
+ try:
329
+ if '{' in response_text and '}' in response_text:
330
+ json_start = response_text.find('{')
331
+ json_end = response_text.rfind('}') + 1
332
+ json_str = response_text[json_start:json_end]
333
+ result = json.loads(json_str)
334
+
335
+ return {
336
+ 'word': result.get('word'),
337
+ 'sentence': result.get('sentence'),
338
+ 'confidence': float(result.get('confidence', 0.0)),
339
+ 'individual_letters': result.get('individual_letters', [])
340
+ }
341
+ else:
342
+ return self._parse_sequence_text_response(response_text)
343
+
344
+ except (json.JSONDecodeError, ValueError):
345
+ return self._parse_sequence_text_response(response_text)
346
+
347
+ def _parse_text_response(self, response_text: str) -> Dict[str, Any]:
348
+ """Enhanced fallback text parsing for single gesture."""
349
+ response_lower = response_text.lower()
350
+
351
+ # Common ASL words to look for
352
+ common_words = ['hello', 'hungry', 'thank you', 'please', 'sorry', 'yes', 'no',
353
+ 'i', 'you', 'love', 'help', 'more', 'water', 'eat', 'drink',
354
+ 'good', 'bad', 'happy', 'sad', 'stop', 'go', 'come', 'home']
355
+
356
+ # Look for words first (priority)
357
+ word = None
358
+ for w in common_words:
359
+ if w in response_lower:
360
+ word = w.upper()
361
+ break
362
+
363
+ # Look for letter patterns
364
+ letter = None
365
+ if not word: # Only look for letters if no word found
366
+ import re
367
+ letter_match = re.search(r'letter\s*[:\-]?\s*([a-z])', response_lower)
368
+ if letter_match:
369
+ letter = letter_match.group(1).upper()
370
+
371
+ # Look for word patterns
372
+ word = None
373
+ if 'word' in response_lower:
374
+ word_match = re.search(r'word\s*[:\-]?\s*([a-z]+)', response_lower)
375
+ if word_match:
376
+ word = word_match.group(1).upper()
377
+
378
+ return {
379
+ 'letter': letter,
380
+ 'word': word,
381
+ 'confidence': 0.5, # Default confidence for text parsing
382
+ 'description': response_text[:100] # First 100 chars
383
+ }
384
+
385
+ def _parse_sequence_text_response(self, response_text: str) -> Dict[str, Any]:
386
+ """Fallback text parsing for sequence."""
387
+ return {
388
+ 'word': None,
389
+ 'sentence': None,
390
+ 'confidence': 0.5,
391
+ 'individual_letters': []
392
+ }
src/src/output_handler.py ADDED
@@ -0,0 +1,391 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Output Display and Speech Synthesis Module
3
+ Handles text display and text-to-speech functionality
4
+ """
5
+
6
+ import pyttsx3
7
+ import threading
8
+ import time
9
+ import os
10
+ from typing import List, Dict, Any, Optional, Callable
11
+ from queue import Queue, Empty
12
+ import json
13
+ from datetime import datetime
14
+
15
+
16
+ class OutputHandler:
17
+ """
18
+ Handles text display and speech synthesis for sign language detection results.
19
+ """
20
+
21
+ def __init__(self,
22
+ enable_speech: bool = True,
23
+ speech_rate: int = 150,
24
+ speech_volume: float = 0.9,
25
+ save_transcript: bool = True,
26
+ transcript_file: str = "sign_language_transcript.txt"):
27
+ """
28
+ Initialize the OutputHandler.
29
+
30
+ Args:
31
+ enable_speech: Whether to enable text-to-speech
32
+ speech_rate: Speech rate (words per minute)
33
+ speech_volume: Speech volume (0.0 to 1.0)
34
+ save_transcript: Whether to save transcript to file
35
+ transcript_file: Path to transcript file
36
+ """
37
+ self.enable_speech = enable_speech
38
+ self.speech_rate = speech_rate
39
+ self.speech_volume = speech_volume
40
+ self.save_transcript = save_transcript
41
+ self.transcript_file = transcript_file
42
+
43
+ # Initialize TTS engine
44
+ self.tts_engine = None
45
+ self.tts_thread = None
46
+ self.speech_queue = Queue()
47
+ self.is_speaking = False
48
+
49
+ # Transcript storage
50
+ self.transcript = []
51
+ self.current_session_start = datetime.now()
52
+
53
+ # Display callbacks
54
+ self.display_callbacks = []
55
+
56
+ # Initialize TTS if enabled
57
+ if self.enable_speech:
58
+ self._initialize_tts()
59
+
60
+ def _initialize_tts(self) -> bool:
61
+ """
62
+ Initialize the text-to-speech engine.
63
+
64
+ Returns:
65
+ True if initialized successfully, False otherwise
66
+ """
67
+ try:
68
+ self.tts_engine = pyttsx3.init()
69
+
70
+ # Set properties
71
+ self.tts_engine.setProperty('rate', self.speech_rate)
72
+ self.tts_engine.setProperty('volume', self.speech_volume)
73
+
74
+ # Get available voices
75
+ voices = self.tts_engine.getProperty('voices')
76
+ if voices:
77
+ # Try to use a female voice if available
78
+ for voice in voices:
79
+ if 'female' in voice.name.lower() or 'woman' in voice.name.lower():
80
+ self.tts_engine.setProperty('voice', voice.id)
81
+ break
82
+ else:
83
+ # Use first available voice
84
+ self.tts_engine.setProperty('voice', voices[0].id)
85
+
86
+ # Start TTS thread
87
+ self.tts_thread = threading.Thread(target=self._tts_worker, daemon=True)
88
+ self.tts_thread.start()
89
+
90
+ print("Text-to-speech initialized successfully")
91
+ return True
92
+
93
+ except Exception as e:
94
+ print(f"Error initializing TTS: {e}")
95
+ self.enable_speech = False
96
+ return False
97
+
98
+ def _tts_worker(self):
99
+ """TTS worker thread that processes speech queue."""
100
+ while True:
101
+ try:
102
+ text = self.speech_queue.get(timeout=1.0)
103
+ if text is None: # Shutdown signal
104
+ break
105
+
106
+ self.is_speaking = True
107
+ self.tts_engine.say(text)
108
+ self.tts_engine.runAndWait()
109
+ self.is_speaking = False
110
+
111
+ except Empty:
112
+ continue
113
+ except Exception as e:
114
+ print(f"Error in TTS worker: {e}")
115
+ self.is_speaking = False
116
+
117
+ def add_display_callback(self, callback: Callable):
118
+ """
119
+ Add a callback function for display updates.
120
+
121
+ Args:
122
+ callback: Function to call when display should be updated
123
+ """
124
+ self.display_callbacks.append(callback)
125
+
126
+ def display_detection(self, detection: Dict[str, Any], speak: bool = True):
127
+ """
128
+ Display and optionally speak a gesture detection result.
129
+
130
+ Args:
131
+ detection: Detection result dictionary
132
+ speak: Whether to speak the result
133
+ """
134
+ # Extract relevant information
135
+ hand_label = detection.get('hand_label', 'Unknown')
136
+ classification = detection.get('classification', {})
137
+
138
+ if not classification.get('success', False):
139
+ return
140
+
141
+ # Format display text
142
+ display_text = self._format_detection_text(detection)
143
+
144
+ # Add to transcript
145
+ if self.save_transcript:
146
+ self._add_to_transcript(detection, display_text)
147
+
148
+ # Call display callbacks
149
+ for callback in self.display_callbacks:
150
+ try:
151
+ callback(display_text, detection)
152
+ except Exception as e:
153
+ print(f"Error in display callback: {e}")
154
+
155
+ # Speak if enabled and requested
156
+ if speak and self.enable_speech:
157
+ speech_text = self._format_speech_text(detection)
158
+ self.speak(speech_text)
159
+
160
+ # Print to console
161
+ print(f"[{datetime.now().strftime('%H:%M:%S')}] {display_text}")
162
+
163
+ def display_sequence(self, sequence_result: Dict[str, Any], speak: bool = True):
164
+ """
165
+ Display and optionally speak a gesture sequence result.
166
+
167
+ Args:
168
+ sequence_result: Sequence classification result
169
+ speak: Whether to speak the result
170
+ """
171
+ if not sequence_result.get('success', False):
172
+ return
173
+
174
+ # Format display text
175
+ display_text = self._format_sequence_text(sequence_result)
176
+
177
+ # Add to transcript
178
+ if self.save_transcript:
179
+ self._add_sequence_to_transcript(sequence_result, display_text)
180
+
181
+ # Call display callbacks
182
+ for callback in self.display_callbacks:
183
+ try:
184
+ callback(display_text, sequence_result)
185
+ except Exception as e:
186
+ print(f"Error in display callback: {e}")
187
+
188
+ # Speak if enabled and requested
189
+ if speak and self.enable_speech:
190
+ speech_text = self._format_sequence_speech_text(sequence_result)
191
+ self.speak(speech_text)
192
+
193
+ # Print to console
194
+ print(f"[{datetime.now().strftime('%H:%M:%S')}] SEQUENCE: {display_text}")
195
+
196
+ def speak(self, text: str):
197
+ """
198
+ Add text to speech queue.
199
+
200
+ Args:
201
+ text: Text to speak
202
+ """
203
+ if self.enable_speech and not self.is_speaking:
204
+ self.speech_queue.put(text)
205
+
206
+ def _format_detection_text(self, detection: Dict[str, Any]) -> str:
207
+ """Format detection result for display."""
208
+ classification = detection.get('classification', {})
209
+ hand_label = detection.get('hand_label', 'Unknown')
210
+
211
+ parts = [f"{hand_label} hand:"]
212
+
213
+ if classification.get('letter'):
214
+ parts.append(f"Letter '{classification['letter']}'")
215
+
216
+ if classification.get('word'):
217
+ parts.append(f"Word '{classification['word']}'")
218
+
219
+ confidence = classification.get('confidence', 0.0)
220
+ if confidence > 0:
221
+ parts.append(f"({confidence:.1%} confidence)")
222
+
223
+ return " ".join(parts)
224
+
225
+ def _format_sequence_text(self, sequence_result: Dict[str, Any]) -> str:
226
+ """Format sequence result for display."""
227
+ parts = []
228
+
229
+ if sequence_result.get('word'):
230
+ parts.append(f"Word: '{sequence_result['word']}'")
231
+
232
+ if sequence_result.get('sentence'):
233
+ parts.append(f"Sentence: '{sequence_result['sentence']}'")
234
+
235
+ if sequence_result.get('individual_letters'):
236
+ letters = " ".join(sequence_result['individual_letters'])
237
+ parts.append(f"Letters: {letters}")
238
+
239
+ confidence = sequence_result.get('confidence', 0.0)
240
+ if confidence > 0:
241
+ parts.append(f"({confidence:.1%} confidence)")
242
+
243
+ return " | ".join(parts)
244
+
245
+ def _format_speech_text(self, detection: Dict[str, Any]) -> str:
246
+ """Format detection result for speech."""
247
+ classification = detection.get('classification', {})
248
+
249
+ if classification.get('word'):
250
+ return classification['word']
251
+ elif classification.get('letter'):
252
+ return f"Letter {classification['letter']}"
253
+ else:
254
+ return "Gesture detected"
255
+
256
+ def _format_sequence_speech_text(self, sequence_result: Dict[str, Any]) -> str:
257
+ """Format sequence result for speech."""
258
+ if sequence_result.get('sentence'):
259
+ return sequence_result['sentence']
260
+ elif sequence_result.get('word'):
261
+ return sequence_result['word']
262
+ else:
263
+ return "Sequence detected"
264
+
265
+ def _add_to_transcript(self, detection: Dict[str, Any], display_text: str):
266
+ """Add detection to transcript."""
267
+ transcript_entry = {
268
+ 'timestamp': datetime.now().isoformat(),
269
+ 'type': 'detection',
270
+ 'display_text': display_text,
271
+ 'detection': detection
272
+ }
273
+ self.transcript.append(transcript_entry)
274
+
275
+ # Save to file periodically
276
+ if len(self.transcript) % 10 == 0:
277
+ self._save_transcript()
278
+
279
+ def _add_sequence_to_transcript(self, sequence_result: Dict[str, Any], display_text: str):
280
+ """Add sequence to transcript."""
281
+ transcript_entry = {
282
+ 'timestamp': datetime.now().isoformat(),
283
+ 'type': 'sequence',
284
+ 'display_text': display_text,
285
+ 'sequence_result': sequence_result
286
+ }
287
+ self.transcript.append(transcript_entry)
288
+ self._save_transcript()
289
+
290
+ def _save_transcript(self):
291
+ """Save transcript to file."""
292
+ if not self.save_transcript:
293
+ return
294
+
295
+ try:
296
+ # Create transcript data
297
+ transcript_data = {
298
+ 'session_start': self.current_session_start.isoformat(),
299
+ 'last_updated': datetime.now().isoformat(),
300
+ 'entries': self.transcript
301
+ }
302
+
303
+ # Save as JSON
304
+ json_file = os.path.splitext(self.transcript_file)[0] + '.json'
305
+ with open(json_file, 'w') as f:
306
+ json.dump(transcript_data, f, indent=2)
307
+
308
+ # Save as readable text
309
+ with open(self.transcript_file, 'w') as f:
310
+ f.write(f"Sign Language Detection Transcript\n")
311
+ f.write(f"Session started: {self.current_session_start.strftime('%Y-%m-%d %H:%M:%S')}\n")
312
+ f.write(f"Last updated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
313
+ f.write("=" * 50 + "\n\n")
314
+
315
+ for entry in self.transcript:
316
+ timestamp = datetime.fromisoformat(entry['timestamp'])
317
+ f.write(f"[{timestamp.strftime('%H:%M:%S')}] {entry['display_text']}\n")
318
+
319
+ except Exception as e:
320
+ print(f"Error saving transcript: {e}")
321
+
322
+ def get_transcript_summary(self) -> Dict[str, Any]:
323
+ """
324
+ Get summary of current transcript.
325
+
326
+ Returns:
327
+ Dictionary containing transcript summary
328
+ """
329
+ if not self.transcript:
330
+ return {'total_entries': 0, 'detections': 0, 'sequences': 0}
331
+
332
+ detections = sum(1 for entry in self.transcript if entry['type'] == 'detection')
333
+ sequences = sum(1 for entry in self.transcript if entry['type'] == 'sequence')
334
+
335
+ # Extract detected words and letters
336
+ detected_words = []
337
+ detected_letters = []
338
+
339
+ for entry in self.transcript:
340
+ if entry['type'] == 'detection':
341
+ classification = entry.get('detection', {}).get('classification', {})
342
+ if classification.get('word'):
343
+ detected_words.append(classification['word'])
344
+ if classification.get('letter'):
345
+ detected_letters.append(classification['letter'])
346
+ elif entry['type'] == 'sequence':
347
+ sequence_result = entry.get('sequence_result', {})
348
+ if sequence_result.get('word'):
349
+ detected_words.append(sequence_result['word'])
350
+ if sequence_result.get('sentence'):
351
+ detected_words.extend(sequence_result['sentence'].split())
352
+
353
+ return {
354
+ 'total_entries': len(self.transcript),
355
+ 'detections': detections,
356
+ 'sequences': sequences,
357
+ 'detected_words': list(set(detected_words)),
358
+ 'detected_letters': list(set(detected_letters)),
359
+ 'session_duration': (datetime.now() - self.current_session_start).total_seconds()
360
+ }
361
+
362
+ def clear_transcript(self):
363
+ """Clear the current transcript."""
364
+ self.transcript = []
365
+ self.current_session_start = datetime.now()
366
+ print("Transcript cleared")
367
+
368
+ def set_speech_enabled(self, enabled: bool):
369
+ """Enable or disable speech synthesis."""
370
+ self.enable_speech = enabled
371
+ if not enabled and self.is_speaking:
372
+ # Stop current speech
373
+ if self.tts_engine:
374
+ self.tts_engine.stop()
375
+
376
+ def cleanup(self):
377
+ """Clean up resources."""
378
+ # Save final transcript
379
+ if self.save_transcript and self.transcript:
380
+ self._save_transcript()
381
+
382
+ # Stop TTS
383
+ if self.tts_thread:
384
+ self.speech_queue.put(None) # Shutdown signal
385
+ self.tts_thread.join(timeout=2.0)
386
+
387
+ if self.tts_engine:
388
+ try:
389
+ self.tts_engine.stop()
390
+ except:
391
+ pass
src/src/prediction_logger.py ADDED
@@ -0,0 +1,294 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Comprehensive Prediction Logging System
3
+
4
+ This module provides detailed logging for the sign language prediction pipeline
5
+ to help identify where predictions are failing and track performance.
6
+ """
7
+
8
+ import json
9
+ import time
10
+ from datetime import datetime
11
+ from typing import Dict, Any, List, Optional
12
+ import os
13
+
14
+
15
+ class PredictionLogger:
16
+ """
17
+ Comprehensive logging system for sign language predictions.
18
+ """
19
+
20
+ def __init__(self, log_file: str = "prediction_logs.json", debug: bool = True):
21
+ """
22
+ Initialize the prediction logger.
23
+
24
+ Args:
25
+ log_file: Path to the log file
26
+ debug: Whether to print debug information
27
+ """
28
+ self.log_file = log_file
29
+ self.debug = debug
30
+ self.session_id = f"session_{int(time.time())}"
31
+ self.logs = []
32
+
33
+ if self.debug:
34
+ print(f"🔍 Prediction Logger initialized - Session: {self.session_id}")
35
+
36
+ def log_hand_detection(self, image_info: Dict[str, Any], hands_detected: int,
37
+ detection_confidence: List[float] = None) -> str:
38
+ """
39
+ Log hand detection results.
40
+
41
+ Args:
42
+ image_info: Information about the processed image
43
+ hands_detected: Number of hands detected
44
+ detection_confidence: List of confidence scores for detected hands
45
+
46
+ Returns:
47
+ Log entry ID
48
+ """
49
+ log_entry = {
50
+ "id": f"hand_det_{int(time.time() * 1000)}",
51
+ "timestamp": datetime.now().isoformat(),
52
+ "session_id": self.session_id,
53
+ "stage": "hand_detection",
54
+ "image_info": image_info,
55
+ "hands_detected": hands_detected,
56
+ "detection_confidence": detection_confidence or [],
57
+ "success": hands_detected > 0
58
+ }
59
+
60
+ self.logs.append(log_entry)
61
+
62
+ if self.debug:
63
+ status = "✅" if hands_detected > 0 else "❌"
64
+ print(f"{status} Hand Detection: {hands_detected} hands detected")
65
+ if detection_confidence:
66
+ for i, conf in enumerate(detection_confidence):
67
+ print(f" Hand {i+1}: {conf:.1%} confidence")
68
+
69
+ return log_entry["id"]
70
+
71
+ def log_gesture_extraction(self, hand_data: Dict[str, Any],
72
+ gesture_description: str) -> str:
73
+ """
74
+ Log gesture extraction results.
75
+
76
+ Args:
77
+ hand_data: Hand landmark data
78
+ gesture_description: Generated gesture description
79
+
80
+ Returns:
81
+ Log entry ID
82
+ """
83
+ log_entry = {
84
+ "id": f"gest_ext_{int(time.time() * 1000)}",
85
+ "timestamp": datetime.now().isoformat(),
86
+ "session_id": self.session_id,
87
+ "stage": "gesture_extraction",
88
+ "hand_label": hand_data.get('label', 'Unknown'),
89
+ "hand_confidence": hand_data.get('confidence', 0.0),
90
+ "gesture_description": gesture_description,
91
+ "description_length": len(gesture_description),
92
+ "success": len(gesture_description) > 0
93
+ }
94
+
95
+ self.logs.append(log_entry)
96
+
97
+ if self.debug:
98
+ print(f"✅ Gesture Extraction: {len(gesture_description)} chars description")
99
+ print(f" Hand: {hand_data.get('label', 'Unknown')} ({hand_data.get('confidence', 0):.1%})")
100
+
101
+ return log_entry["id"]
102
+
103
+ def log_ai_classification(self, gesture_description: str, ai_provider: str,
104
+ response: Dict[str, Any], success: bool,
105
+ error_message: str = None) -> str:
106
+ """
107
+ Log AI classification attempts.
108
+
109
+ Args:
110
+ gesture_description: Input gesture description
111
+ ai_provider: AI provider used (gemini, openai, etc.)
112
+ response: AI response data
113
+ success: Whether the classification succeeded
114
+ error_message: Error message if failed
115
+
116
+ Returns:
117
+ Log entry ID
118
+ """
119
+ log_entry = {
120
+ "id": f"ai_class_{int(time.time() * 1000)}",
121
+ "timestamp": datetime.now().isoformat(),
122
+ "session_id": self.session_id,
123
+ "stage": "ai_classification",
124
+ "ai_provider": ai_provider,
125
+ "input_description": gesture_description,
126
+ "response": response,
127
+ "success": success,
128
+ "error_message": error_message,
129
+ "prediction": response.get('word') or response.get('letter') if success else None,
130
+ "confidence": response.get('confidence', 0.0) if success else 0.0
131
+ }
132
+
133
+ self.logs.append(log_entry)
134
+
135
+ if self.debug:
136
+ status = "✅" if success else "❌"
137
+ if success:
138
+ prediction = response.get('word') or response.get('letter') or 'No prediction'
139
+ confidence = response.get('confidence', 0.0)
140
+ print(f"{status} AI Classification ({ai_provider}): {prediction} ({confidence:.1%})")
141
+ else:
142
+ print(f"{status} AI Classification ({ai_provider}) Failed: {error_message}")
143
+
144
+ return log_entry["id"]
145
+
146
+ def log_fallback_classification(self, gesture_description: str,
147
+ response: Dict[str, Any], success: bool) -> str:
148
+ """
149
+ Log fallback classification results.
150
+
151
+ Args:
152
+ gesture_description: Input gesture description
153
+ response: Fallback classifier response
154
+ success: Whether the classification succeeded
155
+
156
+ Returns:
157
+ Log entry ID
158
+ """
159
+ log_entry = {
160
+ "id": f"fallback_{int(time.time() * 1000)}",
161
+ "timestamp": datetime.now().isoformat(),
162
+ "session_id": self.session_id,
163
+ "stage": "fallback_classification",
164
+ "input_description": gesture_description,
165
+ "response": response,
166
+ "success": success,
167
+ "prediction": response.get('word') or response.get('letter') if success else None,
168
+ "confidence": response.get('confidence', 0.0) if success else 0.0
169
+ }
170
+
171
+ self.logs.append(log_entry)
172
+
173
+ if self.debug:
174
+ status = "✅" if success else "❌"
175
+ if success:
176
+ prediction = response.get('word') or response.get('letter') or 'No prediction'
177
+ confidence = response.get('confidence', 0.0)
178
+ print(f"{status} Fallback Classification: {prediction} ({confidence:.1%})")
179
+ else:
180
+ print(f"{status} Fallback Classification Failed")
181
+
182
+ return log_entry["id"]
183
+
184
+ def log_final_prediction(self, file_path: str, final_prediction: str,
185
+ confidence: float, method_used: str,
186
+ processing_time: float) -> str:
187
+ """
188
+ Log final prediction results.
189
+
190
+ Args:
191
+ file_path: Path to the processed file
192
+ final_prediction: Final prediction result
193
+ confidence: Prediction confidence
194
+ method_used: Method that provided the final prediction
195
+ processing_time: Total processing time in seconds
196
+
197
+ Returns:
198
+ Log entry ID
199
+ """
200
+ log_entry = {
201
+ "id": f"final_{int(time.time() * 1000)}",
202
+ "timestamp": datetime.now().isoformat(),
203
+ "session_id": self.session_id,
204
+ "stage": "final_prediction",
205
+ "file_path": file_path,
206
+ "final_prediction": final_prediction,
207
+ "confidence": confidence,
208
+ "method_used": method_used,
209
+ "processing_time": processing_time,
210
+ "success": final_prediction is not None and final_prediction != "No prediction"
211
+ }
212
+
213
+ self.logs.append(log_entry)
214
+
215
+ if self.debug:
216
+ status = "🎯" if log_entry["success"] else "❌"
217
+ print(f"{status} Final Prediction: {final_prediction} ({confidence:.1%}) via {method_used}")
218
+ print(f" Processing time: {processing_time:.2f}s")
219
+
220
+ return log_entry["id"]
221
+
222
+ def get_session_summary(self) -> Dict[str, Any]:
223
+ """
224
+ Get a summary of the current session.
225
+
226
+ Returns:
227
+ Session summary statistics
228
+ """
229
+ total_predictions = len([log for log in self.logs if log["stage"] == "final_prediction"])
230
+ successful_predictions = len([log for log in self.logs
231
+ if log["stage"] == "final_prediction" and log["success"]])
232
+
233
+ hand_detections = len([log for log in self.logs if log["stage"] == "hand_detection"])
234
+ successful_hand_detections = len([log for log in self.logs
235
+ if log["stage"] == "hand_detection" and log["success"]])
236
+
237
+ ai_attempts = len([log for log in self.logs if log["stage"] == "ai_classification"])
238
+ successful_ai = len([log for log in self.logs
239
+ if log["stage"] == "ai_classification" and log["success"]])
240
+
241
+ fallback_attempts = len([log for log in self.logs if log["stage"] == "fallback_classification"])
242
+
243
+ summary = {
244
+ "session_id": self.session_id,
245
+ "total_files_processed": total_predictions,
246
+ "successful_predictions": successful_predictions,
247
+ "prediction_success_rate": successful_predictions / total_predictions if total_predictions > 0 else 0,
248
+ "hand_detection_success_rate": successful_hand_detections / hand_detections if hand_detections > 0 else 0,
249
+ "ai_classification_success_rate": successful_ai / ai_attempts if ai_attempts > 0 else 0,
250
+ "fallback_usage_rate": fallback_attempts / total_predictions if total_predictions > 0 else 0,
251
+ "total_logs": len(self.logs)
252
+ }
253
+
254
+ return summary
255
+
256
+ def save_logs(self) -> bool:
257
+ """
258
+ Save logs to file.
259
+
260
+ Returns:
261
+ True if successful, False otherwise
262
+ """
263
+ try:
264
+ with open(self.log_file, 'w') as f:
265
+ json.dump({
266
+ "session_summary": self.get_session_summary(),
267
+ "logs": self.logs
268
+ }, f, indent=2)
269
+
270
+ if self.debug:
271
+ print(f"💾 Logs saved to {self.log_file}")
272
+
273
+ return True
274
+ except Exception as e:
275
+ if self.debug:
276
+ print(f"❌ Failed to save logs: {e}")
277
+ return False
278
+
279
+ def print_summary(self):
280
+ """Print a summary of the current session."""
281
+ summary = self.get_session_summary()
282
+
283
+ print("\n" + "="*50)
284
+ print("📊 PREDICTION SESSION SUMMARY")
285
+ print("="*50)
286
+ print(f"Session ID: {summary['session_id']}")
287
+ print(f"Files Processed: {summary['total_files_processed']}")
288
+ print(f"Successful Predictions: {summary['successful_predictions']}")
289
+ print(f"Prediction Success Rate: {summary['prediction_success_rate']:.1%}")
290
+ print(f"Hand Detection Success Rate: {summary['hand_detection_success_rate']:.1%}")
291
+ print(f"AI Classification Success Rate: {summary['ai_classification_success_rate']:.1%}")
292
+ print(f"Fallback Usage Rate: {summary['fallback_usage_rate']:.1%}")
293
+ print(f"Total Log Entries: {summary['total_logs']}")
294
+ print("="*50)
src/src/visualization_utils.py ADDED
@@ -0,0 +1,359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Visualization utilities for enhanced result display
3
+ """
4
+
5
+ import cv2
6
+ import numpy as np
7
+ import matplotlib.pyplot as plt
8
+ import matplotlib.patches as patches
9
+ from matplotlib.patches import Circle
10
+ import plotly.graph_objects as go
11
+ import plotly.express as px
12
+ from plotly.subplots import make_subplots
13
+ from typing import List, Dict, Any, Tuple
14
+ import pandas as pd
15
+
16
+
17
+ class HandLandmarkVisualizer:
18
+ """
19
+ Enhanced visualization for hand landmarks and gesture analysis.
20
+ """
21
+
22
+ def __init__(self):
23
+ """Initialize the visualizer."""
24
+ # MediaPipe hand landmark connections
25
+ self.hand_connections = [
26
+ (0, 1), (1, 2), (2, 3), (3, 4), # Thumb
27
+ (0, 5), (5, 6), (6, 7), (7, 8), # Index finger
28
+ (0, 9), (9, 10), (10, 11), (11, 12), # Middle finger
29
+ (0, 13), (13, 14), (14, 15), (15, 16), # Ring finger
30
+ (0, 17), (17, 18), (18, 19), (19, 20), # Pinky
31
+ (5, 9), (9, 13), (13, 17) # Palm connections
32
+ ]
33
+
34
+ # Color scheme for different parts
35
+ self.colors = {
36
+ 'thumb': (255, 0, 0), # Red
37
+ 'index': (0, 255, 0), # Green
38
+ 'middle': (0, 0, 255), # Blue
39
+ 'ring': (255, 255, 0), # Yellow
40
+ 'pinky': (255, 0, 255), # Magenta
41
+ 'palm': (0, 255, 255), # Cyan
42
+ 'wrist': (128, 128, 128) # Gray
43
+ }
44
+
45
+ # Finger landmark ranges
46
+ self.finger_ranges = {
47
+ 'thumb': range(1, 5),
48
+ 'index': range(5, 9),
49
+ 'middle': range(9, 13),
50
+ 'ring': range(13, 17),
51
+ 'pinky': range(17, 21),
52
+ 'wrist': [0]
53
+ }
54
+
55
+ def draw_enhanced_landmarks(self, image: np.ndarray,
56
+ hand_landmarks: List[Dict[str, Any]]) -> np.ndarray:
57
+ """
58
+ Draw enhanced hand landmarks with color coding and connections.
59
+
60
+ Args:
61
+ image: Input image
62
+ hand_landmarks: List of hand landmark data
63
+
64
+ Returns:
65
+ Image with enhanced landmarks drawn
66
+ """
67
+ annotated_image = image.copy()
68
+ height, width = image.shape[:2]
69
+
70
+ for hand_data in hand_landmarks:
71
+ landmarks = hand_data['landmarks']
72
+ hand_label = hand_data['label']
73
+
74
+ # Convert normalized coordinates to pixel coordinates
75
+ landmark_points = []
76
+ for landmark in landmarks:
77
+ x = int(landmark['x'] * width)
78
+ y = int(landmark['y'] * height)
79
+ landmark_points.append((x, y))
80
+
81
+ # Draw connections
82
+ for connection in self.hand_connections:
83
+ start_idx, end_idx = connection
84
+ start_point = landmark_points[start_idx]
85
+ end_point = landmark_points[end_idx]
86
+
87
+ # Determine color based on finger
88
+ color = self._get_connection_color(start_idx, end_idx)
89
+ cv2.line(annotated_image, start_point, end_point, color, 2)
90
+
91
+ # Draw landmark points
92
+ for i, point in enumerate(landmark_points):
93
+ color = self._get_landmark_color(i)
94
+ cv2.circle(annotated_image, point, 4, color, -1)
95
+ cv2.circle(annotated_image, point, 6, (255, 255, 255), 1)
96
+
97
+ # Add hand label
98
+ if landmark_points:
99
+ label_pos = (landmark_points[0][0] - 50, landmark_points[0][1] - 20)
100
+ cv2.putText(annotated_image, f"{hand_label} Hand", label_pos,
101
+ cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
102
+ cv2.putText(annotated_image, f"{hand_label} Hand", label_pos,
103
+ cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 1)
104
+
105
+ return annotated_image
106
+
107
+ def _get_landmark_color(self, landmark_idx: int) -> Tuple[int, int, int]:
108
+ """Get color for a specific landmark."""
109
+ for finger, indices in self.finger_ranges.items():
110
+ if landmark_idx in indices:
111
+ return self.colors[finger]
112
+ return (128, 128, 128) # Default gray
113
+
114
+ def _get_connection_color(self, start_idx: int, end_idx: int) -> Tuple[int, int, int]:
115
+ """Get color for a connection between landmarks."""
116
+ # Use the color of the finger that both landmarks belong to
117
+ for finger, indices in self.finger_ranges.items():
118
+ if start_idx in indices and end_idx in indices:
119
+ return self.colors[finger]
120
+ return self.colors['palm'] # Default to palm color
121
+
122
+ def create_3d_hand_plot(self, hand_landmarks: Dict[str, Any]) -> go.Figure:
123
+ """
124
+ Create a 3D visualization of hand landmarks.
125
+
126
+ Args:
127
+ hand_landmarks: Hand landmark data
128
+
129
+ Returns:
130
+ Plotly 3D figure
131
+ """
132
+ landmarks = hand_landmarks['landmarks']
133
+
134
+ # Extract coordinates
135
+ x_coords = [landmark['x'] for landmark in landmarks]
136
+ y_coords = [-landmark['y'] for landmark in landmarks] # Flip Y for proper orientation
137
+ z_coords = [landmark['z'] for landmark in landmarks]
138
+
139
+ # Create 3D scatter plot
140
+ fig = go.Figure()
141
+
142
+ # Add landmark points
143
+ fig.add_trace(go.Scatter3d(
144
+ x=x_coords,
145
+ y=y_coords,
146
+ z=z_coords,
147
+ mode='markers',
148
+ marker=dict(
149
+ size=8,
150
+ color=z_coords,
151
+ colorscale='Viridis',
152
+ showscale=True,
153
+ colorbar=dict(title="Depth")
154
+ ),
155
+ text=[f"Landmark {i}" for i in range(len(landmarks))],
156
+ name="Hand Landmarks"
157
+ ))
158
+
159
+ # Add connections
160
+ for connection in self.hand_connections:
161
+ start_idx, end_idx = connection
162
+ fig.add_trace(go.Scatter3d(
163
+ x=[x_coords[start_idx], x_coords[end_idx]],
164
+ y=[y_coords[start_idx], y_coords[end_idx]],
165
+ z=[z_coords[start_idx], z_coords[end_idx]],
166
+ mode='lines',
167
+ line=dict(color='rgba(100, 100, 100, 0.6)', width=3),
168
+ showlegend=False
169
+ ))
170
+
171
+ # Update layout
172
+ fig.update_layout(
173
+ title=f"3D Hand Landmarks - {hand_landmarks['label']} Hand",
174
+ scene=dict(
175
+ xaxis_title="X",
176
+ yaxis_title="Y",
177
+ zaxis_title="Z (Depth)",
178
+ camera=dict(
179
+ eye=dict(x=1.5, y=1.5, z=1.5)
180
+ )
181
+ ),
182
+ width=600,
183
+ height=500
184
+ )
185
+
186
+ return fig
187
+
188
+ def create_gesture_feature_radar(self, gesture_features: Dict[str, float]) -> go.Figure:
189
+ """
190
+ Create a radar chart for gesture features.
191
+
192
+ Args:
193
+ gesture_features: Dictionary of gesture features
194
+
195
+ Returns:
196
+ Plotly radar chart figure
197
+ """
198
+ # Normalize features for radar chart
199
+ features = ['Thumb Ext.', 'Index Ext.', 'Middle Ext.', 'Ring Ext.', 'Pinky Ext.',
200
+ 'Thumb-Index Angle', 'Palm Orientation', 'Hand Openness']
201
+
202
+ # Extract and normalize values
203
+ values = [
204
+ gesture_features.get('thumb_extended', 0),
205
+ gesture_features.get('index_extended', 0),
206
+ gesture_features.get('middle_extended', 0),
207
+ gesture_features.get('ring_extended', 0),
208
+ gesture_features.get('pinky_extended', 0),
209
+ gesture_features.get('thumb_index_angle', 0) / 180, # Normalize angle
210
+ gesture_features.get('palm_orientation', 0) / 180, # Normalize angle
211
+ gesture_features.get('hand_openness', 0)
212
+ ]
213
+
214
+ fig = go.Figure()
215
+
216
+ fig.add_trace(go.Scatterpolar(
217
+ r=values,
218
+ theta=features,
219
+ fill='toself',
220
+ name='Gesture Features',
221
+ line_color='rgb(46, 134, 171)'
222
+ ))
223
+
224
+ fig.update_layout(
225
+ polar=dict(
226
+ radialaxis=dict(
227
+ visible=True,
228
+ range=[0, 1]
229
+ )
230
+ ),
231
+ title="Gesture Feature Analysis",
232
+ showlegend=True
233
+ )
234
+
235
+ return fig
236
+
237
+ def create_confidence_gauge(self, confidence: float, title: str = "Confidence") -> go.Figure:
238
+ """
239
+ Create a gauge chart for confidence scores.
240
+
241
+ Args:
242
+ confidence: Confidence value (0-1)
243
+ title: Title for the gauge
244
+
245
+ Returns:
246
+ Plotly gauge figure
247
+ """
248
+ fig = go.Figure(go.Indicator(
249
+ mode="gauge+number+delta",
250
+ value=confidence * 100,
251
+ domain={'x': [0, 1], 'y': [0, 1]},
252
+ title={'text': title},
253
+ delta={'reference': 80},
254
+ gauge={
255
+ 'axis': {'range': [None, 100]},
256
+ 'bar': {'color': "darkblue"},
257
+ 'steps': [
258
+ {'range': [0, 50], 'color': "lightgray"},
259
+ {'range': [50, 80], 'color': "gray"}
260
+ ],
261
+ 'threshold': {
262
+ 'line': {'color': "red", 'width': 4},
263
+ 'thickness': 0.75,
264
+ 'value': 90
265
+ }
266
+ }
267
+ ))
268
+
269
+ fig.update_layout(height=300)
270
+ return fig
271
+
272
+
273
+ def create_comparison_view(original_image: np.ndarray,
274
+ annotated_image: np.ndarray) -> np.ndarray:
275
+ """
276
+ Create a side-by-side comparison view of original and annotated images.
277
+
278
+ Args:
279
+ original_image: Original input image
280
+ annotated_image: Image with landmarks drawn
281
+
282
+ Returns:
283
+ Combined comparison image
284
+ """
285
+ # Ensure both images have the same height
286
+ height = max(original_image.shape[0], annotated_image.shape[0])
287
+
288
+ # Resize images to same height if needed
289
+ if original_image.shape[0] != height:
290
+ aspect_ratio = original_image.shape[1] / original_image.shape[0]
291
+ new_width = int(height * aspect_ratio)
292
+ original_image = cv2.resize(original_image, (new_width, height))
293
+
294
+ if annotated_image.shape[0] != height:
295
+ aspect_ratio = annotated_image.shape[1] / annotated_image.shape[0]
296
+ new_width = int(height * aspect_ratio)
297
+ annotated_image = cv2.resize(annotated_image, (new_width, height))
298
+
299
+ # Create comparison image
300
+ comparison = np.hstack([original_image, annotated_image])
301
+
302
+ # Add labels
303
+ cv2.putText(comparison, "Original", (10, 30),
304
+ cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
305
+ cv2.putText(comparison, "Detected", (original_image.shape[1] + 10, 30),
306
+ cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
307
+
308
+ return comparison
309
+
310
+
311
+ def create_processing_timeline(frame_detections: List[Dict[str, Any]]) -> go.Figure:
312
+ """
313
+ Create a timeline visualization for video processing results.
314
+
315
+ Args:
316
+ frame_detections: List of frame detection results
317
+
318
+ Returns:
319
+ Plotly timeline figure
320
+ """
321
+ if not frame_detections:
322
+ return go.Figure()
323
+
324
+ # Prepare data
325
+ timestamps = [frame['timestamp'] for frame in frame_detections]
326
+ hands_detected = [frame['hands_detected'] for frame in frame_detections]
327
+ frame_numbers = [frame['frame_number'] for frame in frame_detections]
328
+
329
+ # Create timeline plot
330
+ fig = go.Figure()
331
+
332
+ # Add hands detected over time
333
+ fig.add_trace(go.Scatter(
334
+ x=timestamps,
335
+ y=hands_detected,
336
+ mode='markers+lines',
337
+ name='Hands Detected',
338
+ marker=dict(
339
+ size=8,
340
+ color=hands_detected,
341
+ colorscale='Viridis',
342
+ showscale=True,
343
+ colorbar=dict(title="Hands")
344
+ ),
345
+ text=[f"Frame {fn}" for fn in frame_numbers],
346
+ hovertemplate="<b>Frame %{text}</b><br>" +
347
+ "Time: %{x:.1f}s<br>" +
348
+ "Hands: %{y}<br>" +
349
+ "<extra></extra>"
350
+ ))
351
+
352
+ fig.update_layout(
353
+ title="Hand Detection Timeline",
354
+ xaxis_title="Time (seconds)",
355
+ yaxis_title="Number of Hands Detected",
356
+ hovermode='closest'
357
+ )
358
+
359
+ return fig