Upload 24 files
Browse files- src/src/__init__.py +1 -0
- src/src/__pycache__/__init__.cpython-310.pyc +0 -0
- src/src/__pycache__/camera_handler.cpython-310.pyc +0 -0
- src/src/__pycache__/export_utils.cpython-310.pyc +0 -0
- src/src/__pycache__/fallback_classifier.cpython-310.pyc +0 -0
- src/src/__pycache__/file_handler.cpython-310.pyc +0 -0
- src/src/__pycache__/gemini_classifier.cpython-310.pyc +0 -0
- src/src/__pycache__/gesture_extractor.cpython-310.pyc +0 -0
- src/src/__pycache__/hand_detector.cpython-310.pyc +0 -0
- src/src/__pycache__/openai_classifier.cpython-310.pyc +0 -0
- src/src/__pycache__/output_handler.cpython-310.pyc +0 -0
- src/src/__pycache__/prediction_logger.cpython-310.pyc +0 -0
- src/src/__pycache__/visualization_utils.cpython-310.pyc +0 -0
- src/src/camera_handler.py +306 -0
- src/src/export_utils.py +418 -0
- src/src/fallback_classifier.py +303 -0
- src/src/file_handler.py +543 -0
- src/src/gemini_classifier.py +420 -0
- src/src/gesture_extractor.py +270 -0
- src/src/hand_detector.py +196 -0
- src/src/openai_classifier.py +392 -0
- src/src/output_handler.py +391 -0
- src/src/prediction_logger.py +294 -0
- src/src/visualization_utils.py +359 -0
src/src/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Sign Language Detector Package
|
src/src/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (163 Bytes). View file
|
|
|
src/src/__pycache__/camera_handler.cpython-310.pyc
ADDED
|
Binary file (8.47 kB). View file
|
|
|
src/src/__pycache__/export_utils.cpython-310.pyc
ADDED
|
Binary file (10.3 kB). View file
|
|
|
src/src/__pycache__/fallback_classifier.cpython-310.pyc
ADDED
|
Binary file (7.43 kB). View file
|
|
|
src/src/__pycache__/file_handler.cpython-310.pyc
ADDED
|
Binary file (13.6 kB). View file
|
|
|
src/src/__pycache__/gemini_classifier.cpython-310.pyc
ADDED
|
Binary file (10.4 kB). View file
|
|
|
src/src/__pycache__/gesture_extractor.cpython-310.pyc
ADDED
|
Binary file (7.4 kB). View file
|
|
|
src/src/__pycache__/hand_detector.cpython-310.pyc
ADDED
|
Binary file (5.89 kB). View file
|
|
|
src/src/__pycache__/openai_classifier.cpython-310.pyc
ADDED
|
Binary file (11 kB). View file
|
|
|
src/src/__pycache__/output_handler.cpython-310.pyc
ADDED
|
Binary file (10.7 kB). View file
|
|
|
src/src/__pycache__/prediction_logger.cpython-310.pyc
ADDED
|
Binary file (9.71 kB). View file
|
|
|
src/src/__pycache__/visualization_utils.cpython-310.pyc
ADDED
|
Binary file (9.67 kB). View file
|
|
|
src/src/camera_handler.py
ADDED
|
@@ -0,0 +1,306 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Real-time Camera Input Handler for Sign Language Detection
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import cv2
|
| 6 |
+
import numpy as np
|
| 7 |
+
import time
|
| 8 |
+
import threading
|
| 9 |
+
from typing import Optional, Callable, Dict, Any, List
|
| 10 |
+
from queue import Queue, Empty
|
| 11 |
+
|
| 12 |
+
from .hand_detector import HandDetector
|
| 13 |
+
from .gesture_extractor import GestureExtractor
|
| 14 |
+
from .openai_classifier import SignLanguageClassifier
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class CameraHandler:
|
| 18 |
+
"""
|
| 19 |
+
Handles real-time camera input for sign language detection.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
def __init__(self,
|
| 23 |
+
camera_index: int = 0,
|
| 24 |
+
frame_width: int = 640,
|
| 25 |
+
frame_height: int = 480,
|
| 26 |
+
fps: int = 30,
|
| 27 |
+
detection_interval: float = 2.0):
|
| 28 |
+
"""
|
| 29 |
+
Initialize the CameraHandler.
|
| 30 |
+
|
| 31 |
+
Args:
|
| 32 |
+
camera_index: Index of the camera to use
|
| 33 |
+
frame_width: Width of the camera frame
|
| 34 |
+
frame_height: Height of the camera frame
|
| 35 |
+
fps: Frames per second for camera capture
|
| 36 |
+
detection_interval: Seconds between gesture classifications
|
| 37 |
+
"""
|
| 38 |
+
self.camera_index = camera_index
|
| 39 |
+
self.frame_width = frame_width
|
| 40 |
+
self.frame_height = frame_height
|
| 41 |
+
self.fps = fps
|
| 42 |
+
self.detection_interval = detection_interval
|
| 43 |
+
|
| 44 |
+
# Initialize components
|
| 45 |
+
self.hand_detector = HandDetector()
|
| 46 |
+
self.gesture_extractor = GestureExtractor()
|
| 47 |
+
self.classifier = None # Will be initialized when needed
|
| 48 |
+
|
| 49 |
+
# Camera and threading
|
| 50 |
+
self.cap = None
|
| 51 |
+
self.is_running = False
|
| 52 |
+
self.capture_thread = None
|
| 53 |
+
self.detection_thread = None
|
| 54 |
+
|
| 55 |
+
# Frame and detection queues
|
| 56 |
+
self.frame_queue = Queue(maxsize=10)
|
| 57 |
+
self.detection_queue = Queue(maxsize=5)
|
| 58 |
+
|
| 59 |
+
# Callbacks
|
| 60 |
+
self.on_frame_callback = None
|
| 61 |
+
self.on_detection_callback = None
|
| 62 |
+
|
| 63 |
+
# Detection state
|
| 64 |
+
self.last_detection_time = 0
|
| 65 |
+
self.gesture_history = []
|
| 66 |
+
self.max_history_length = 10
|
| 67 |
+
|
| 68 |
+
def initialize_camera(self) -> bool:
|
| 69 |
+
"""
|
| 70 |
+
Initialize the camera.
|
| 71 |
+
|
| 72 |
+
Returns:
|
| 73 |
+
True if camera initialized successfully, False otherwise
|
| 74 |
+
"""
|
| 75 |
+
try:
|
| 76 |
+
self.cap = cv2.VideoCapture(self.camera_index)
|
| 77 |
+
|
| 78 |
+
if not self.cap.isOpened():
|
| 79 |
+
print(f"Error: Could not open camera {self.camera_index}")
|
| 80 |
+
return False
|
| 81 |
+
|
| 82 |
+
# Set camera properties
|
| 83 |
+
self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, self.frame_width)
|
| 84 |
+
self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, self.frame_height)
|
| 85 |
+
self.cap.set(cv2.CAP_PROP_FPS, self.fps)
|
| 86 |
+
|
| 87 |
+
print(f"Camera initialized: {self.frame_width}x{self.frame_height} @ {self.fps}fps")
|
| 88 |
+
return True
|
| 89 |
+
|
| 90 |
+
except Exception as e:
|
| 91 |
+
print(f"Error initializing camera: {e}")
|
| 92 |
+
return False
|
| 93 |
+
|
| 94 |
+
def initialize_classifier(self, api_key: Optional[str] = None) -> bool:
|
| 95 |
+
"""
|
| 96 |
+
Initialize the OpenAI classifier.
|
| 97 |
+
|
| 98 |
+
Args:
|
| 99 |
+
api_key: OpenAI API key
|
| 100 |
+
|
| 101 |
+
Returns:
|
| 102 |
+
True if classifier initialized successfully, False otherwise
|
| 103 |
+
"""
|
| 104 |
+
try:
|
| 105 |
+
self.classifier = SignLanguageClassifier(api_key=api_key)
|
| 106 |
+
print("OpenAI classifier initialized")
|
| 107 |
+
return True
|
| 108 |
+
except Exception as e:
|
| 109 |
+
print(f"Error initializing classifier: {e}")
|
| 110 |
+
return False
|
| 111 |
+
|
| 112 |
+
def set_callbacks(self,
|
| 113 |
+
on_frame: Optional[Callable] = None,
|
| 114 |
+
on_detection: Optional[Callable] = None):
|
| 115 |
+
"""
|
| 116 |
+
Set callback functions for frame and detection events.
|
| 117 |
+
|
| 118 |
+
Args:
|
| 119 |
+
on_frame: Callback for each processed frame
|
| 120 |
+
on_detection: Callback for gesture detections
|
| 121 |
+
"""
|
| 122 |
+
self.on_frame_callback = on_frame
|
| 123 |
+
self.on_detection_callback = on_detection
|
| 124 |
+
|
| 125 |
+
def start_capture(self) -> bool:
|
| 126 |
+
"""
|
| 127 |
+
Start the camera capture and detection threads.
|
| 128 |
+
|
| 129 |
+
Returns:
|
| 130 |
+
True if started successfully, False otherwise
|
| 131 |
+
"""
|
| 132 |
+
if not self.cap or not self.cap.isOpened():
|
| 133 |
+
print("Camera not initialized")
|
| 134 |
+
return False
|
| 135 |
+
|
| 136 |
+
self.is_running = True
|
| 137 |
+
|
| 138 |
+
# Start capture thread
|
| 139 |
+
self.capture_thread = threading.Thread(target=self._capture_loop, daemon=True)
|
| 140 |
+
self.capture_thread.start()
|
| 141 |
+
|
| 142 |
+
# Start detection thread
|
| 143 |
+
self.detection_thread = threading.Thread(target=self._detection_loop, daemon=True)
|
| 144 |
+
self.detection_thread.start()
|
| 145 |
+
|
| 146 |
+
print("Camera capture started")
|
| 147 |
+
return True
|
| 148 |
+
|
| 149 |
+
def stop_capture(self):
|
| 150 |
+
"""Stop the camera capture and detection threads."""
|
| 151 |
+
self.is_running = False
|
| 152 |
+
|
| 153 |
+
if self.capture_thread:
|
| 154 |
+
self.capture_thread.join(timeout=2.0)
|
| 155 |
+
|
| 156 |
+
if self.detection_thread:
|
| 157 |
+
self.detection_thread.join(timeout=2.0)
|
| 158 |
+
|
| 159 |
+
if self.cap:
|
| 160 |
+
self.cap.release()
|
| 161 |
+
|
| 162 |
+
print("Camera capture stopped")
|
| 163 |
+
|
| 164 |
+
def _capture_loop(self):
|
| 165 |
+
"""Main camera capture loop (runs in separate thread)."""
|
| 166 |
+
while self.is_running:
|
| 167 |
+
ret, frame = self.cap.read()
|
| 168 |
+
|
| 169 |
+
if not ret:
|
| 170 |
+
print("Error reading frame from camera")
|
| 171 |
+
break
|
| 172 |
+
|
| 173 |
+
# Flip frame horizontally for mirror effect
|
| 174 |
+
frame = cv2.flip(frame, 1)
|
| 175 |
+
|
| 176 |
+
# Detect hands
|
| 177 |
+
annotated_frame, hand_landmarks = self.hand_detector.detect_hands(frame)
|
| 178 |
+
|
| 179 |
+
# Add frame to queue for detection processing
|
| 180 |
+
if not self.frame_queue.full():
|
| 181 |
+
self.frame_queue.put((frame.copy(), hand_landmarks))
|
| 182 |
+
|
| 183 |
+
# Call frame callback if set
|
| 184 |
+
if self.on_frame_callback:
|
| 185 |
+
self.on_frame_callback(annotated_frame, hand_landmarks)
|
| 186 |
+
|
| 187 |
+
# Small delay to control frame rate
|
| 188 |
+
time.sleep(1.0 / self.fps)
|
| 189 |
+
|
| 190 |
+
def _detection_loop(self):
|
| 191 |
+
"""Gesture detection and classification loop (runs in separate thread)."""
|
| 192 |
+
while self.is_running:
|
| 193 |
+
try:
|
| 194 |
+
# Get frame from queue
|
| 195 |
+
frame, hand_landmarks = self.frame_queue.get(timeout=1.0)
|
| 196 |
+
|
| 197 |
+
# Check if enough time has passed since last detection
|
| 198 |
+
current_time = time.time()
|
| 199 |
+
if current_time - self.last_detection_time < self.detection_interval:
|
| 200 |
+
continue
|
| 201 |
+
|
| 202 |
+
# Process gestures if hands detected
|
| 203 |
+
if hand_landmarks and self.classifier:
|
| 204 |
+
self._process_gestures(hand_landmarks)
|
| 205 |
+
self.last_detection_time = current_time
|
| 206 |
+
|
| 207 |
+
except Empty:
|
| 208 |
+
continue
|
| 209 |
+
except Exception as e:
|
| 210 |
+
print(f"Error in detection loop: {e}")
|
| 211 |
+
|
| 212 |
+
def _process_gestures(self, hand_landmarks: List[Dict[str, Any]]):
|
| 213 |
+
"""
|
| 214 |
+
Process detected hand landmarks and classify gestures.
|
| 215 |
+
|
| 216 |
+
Args:
|
| 217 |
+
hand_landmarks: List of detected hand landmarks
|
| 218 |
+
"""
|
| 219 |
+
detections = []
|
| 220 |
+
|
| 221 |
+
for hand_data in hand_landmarks:
|
| 222 |
+
try:
|
| 223 |
+
# Extract gesture features
|
| 224 |
+
gesture_description = self.gesture_extractor.create_gesture_description(hand_data)
|
| 225 |
+
|
| 226 |
+
# Classify gesture
|
| 227 |
+
classification = self.classifier.classify_gesture(gesture_description)
|
| 228 |
+
|
| 229 |
+
if classification['success']:
|
| 230 |
+
detection = {
|
| 231 |
+
'hand_label': hand_data['label'],
|
| 232 |
+
'gesture_description': gesture_description,
|
| 233 |
+
'classification': classification,
|
| 234 |
+
'timestamp': time.time()
|
| 235 |
+
}
|
| 236 |
+
detections.append(detection)
|
| 237 |
+
|
| 238 |
+
# Add to gesture history
|
| 239 |
+
self.gesture_history.append(detection)
|
| 240 |
+
if len(self.gesture_history) > self.max_history_length:
|
| 241 |
+
self.gesture_history.pop(0)
|
| 242 |
+
|
| 243 |
+
except Exception as e:
|
| 244 |
+
print(f"Error processing gesture: {e}")
|
| 245 |
+
|
| 246 |
+
# Call detection callback if detections found
|
| 247 |
+
if detections and self.on_detection_callback:
|
| 248 |
+
self.on_detection_callback(detections)
|
| 249 |
+
|
| 250 |
+
def get_recent_gestures(self, count: int = 5) -> List[Dict[str, Any]]:
|
| 251 |
+
"""
|
| 252 |
+
Get recent gesture detections.
|
| 253 |
+
|
| 254 |
+
Args:
|
| 255 |
+
count: Number of recent gestures to return
|
| 256 |
+
|
| 257 |
+
Returns:
|
| 258 |
+
List of recent gesture detections
|
| 259 |
+
"""
|
| 260 |
+
return self.gesture_history[-count:] if self.gesture_history else []
|
| 261 |
+
|
| 262 |
+
def classify_gesture_sequence(self, count: int = 5) -> Optional[Dict[str, Any]]:
|
| 263 |
+
"""
|
| 264 |
+
Classify a sequence of recent gestures.
|
| 265 |
+
|
| 266 |
+
Args:
|
| 267 |
+
count: Number of recent gestures to include in sequence
|
| 268 |
+
|
| 269 |
+
Returns:
|
| 270 |
+
Sequence classification result or None
|
| 271 |
+
"""
|
| 272 |
+
if not self.classifier or len(self.gesture_history) < 2:
|
| 273 |
+
return None
|
| 274 |
+
|
| 275 |
+
recent_gestures = self.get_recent_gestures(count)
|
| 276 |
+
gesture_descriptions = [g['gesture_description'] for g in recent_gestures]
|
| 277 |
+
|
| 278 |
+
try:
|
| 279 |
+
return self.classifier.classify_sequence(gesture_descriptions)
|
| 280 |
+
except Exception as e:
|
| 281 |
+
print(f"Error classifying gesture sequence: {e}")
|
| 282 |
+
return None
|
| 283 |
+
|
| 284 |
+
def capture_single_frame(self) -> Optional[np.ndarray]:
|
| 285 |
+
"""
|
| 286 |
+
Capture a single frame from the camera.
|
| 287 |
+
|
| 288 |
+
Returns:
|
| 289 |
+
Captured frame or None if error
|
| 290 |
+
"""
|
| 291 |
+
if not self.cap or not self.cap.isOpened():
|
| 292 |
+
return None
|
| 293 |
+
|
| 294 |
+
ret, frame = self.cap.read()
|
| 295 |
+
if ret:
|
| 296 |
+
return cv2.flip(frame, 1) # Mirror effect
|
| 297 |
+
return None
|
| 298 |
+
|
| 299 |
+
def cleanup(self):
|
| 300 |
+
"""Clean up resources."""
|
| 301 |
+
self.stop_capture()
|
| 302 |
+
|
| 303 |
+
if self.hand_detector:
|
| 304 |
+
self.hand_detector.cleanup()
|
| 305 |
+
|
| 306 |
+
cv2.destroyAllWindows()
|
src/src/export_utils.py
ADDED
|
@@ -0,0 +1,418 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Export utilities for sign language detection results
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import json
|
| 6 |
+
import csv
|
| 7 |
+
import os
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
from typing import List, Dict, Any, Optional
|
| 10 |
+
import pandas as pd
|
| 11 |
+
from reportlab.lib.pagesizes import letter, A4
|
| 12 |
+
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image as RLImage
|
| 13 |
+
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
| 14 |
+
from reportlab.lib.units import inch
|
| 15 |
+
from reportlab.lib import colors
|
| 16 |
+
from reportlab.lib.enums import TA_CENTER, TA_LEFT
|
| 17 |
+
import tempfile
|
| 18 |
+
import cv2
|
| 19 |
+
import numpy as np
|
| 20 |
+
from PIL import Image
|
| 21 |
+
import io
|
| 22 |
+
import base64
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class ResultExporter:
|
| 26 |
+
"""
|
| 27 |
+
Export sign language detection results in various formats.
|
| 28 |
+
"""
|
| 29 |
+
|
| 30 |
+
def __init__(self):
|
| 31 |
+
"""Initialize the exporter."""
|
| 32 |
+
self.styles = getSampleStyleSheet()
|
| 33 |
+
self.custom_styles = self._create_custom_styles()
|
| 34 |
+
|
| 35 |
+
def _create_custom_styles(self) -> Dict[str, ParagraphStyle]:
|
| 36 |
+
"""Create custom paragraph styles for PDF reports."""
|
| 37 |
+
custom_styles = {}
|
| 38 |
+
|
| 39 |
+
# Title style
|
| 40 |
+
custom_styles['CustomTitle'] = ParagraphStyle(
|
| 41 |
+
'CustomTitle',
|
| 42 |
+
parent=self.styles['Title'],
|
| 43 |
+
fontSize=24,
|
| 44 |
+
spaceAfter=30,
|
| 45 |
+
alignment=TA_CENTER,
|
| 46 |
+
textColor=colors.darkblue
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
# Heading style
|
| 50 |
+
custom_styles['CustomHeading'] = ParagraphStyle(
|
| 51 |
+
'CustomHeading',
|
| 52 |
+
parent=self.styles['Heading1'],
|
| 53 |
+
fontSize=16,
|
| 54 |
+
spaceAfter=12,
|
| 55 |
+
spaceBefore=20,
|
| 56 |
+
textColor=colors.darkblue
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
# Subheading style
|
| 60 |
+
custom_styles['CustomSubheading'] = ParagraphStyle(
|
| 61 |
+
'CustomSubheading',
|
| 62 |
+
parent=self.styles['Heading2'],
|
| 63 |
+
fontSize=14,
|
| 64 |
+
spaceAfter=8,
|
| 65 |
+
spaceBefore=12,
|
| 66 |
+
textColor=colors.darkgreen
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
return custom_styles
|
| 70 |
+
|
| 71 |
+
def export_to_json(self, results: List[Dict[str, Any]],
|
| 72 |
+
output_path: str,
|
| 73 |
+
include_metadata: bool = True) -> bool:
|
| 74 |
+
"""
|
| 75 |
+
Export results to JSON format.
|
| 76 |
+
|
| 77 |
+
Args:
|
| 78 |
+
results: List of processing results
|
| 79 |
+
output_path: Output file path
|
| 80 |
+
include_metadata: Whether to include metadata
|
| 81 |
+
|
| 82 |
+
Returns:
|
| 83 |
+
True if successful, False otherwise
|
| 84 |
+
"""
|
| 85 |
+
try:
|
| 86 |
+
export_data = {
|
| 87 |
+
'export_timestamp': datetime.now().isoformat(),
|
| 88 |
+
'total_files': len(results),
|
| 89 |
+
'successful_files': sum(1 for r in results if r.get('success', False)),
|
| 90 |
+
'results': []
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
for result in results:
|
| 94 |
+
# Clean result for JSON serialization
|
| 95 |
+
clean_result = self._clean_result_for_export(result)
|
| 96 |
+
|
| 97 |
+
if not include_metadata:
|
| 98 |
+
# Remove large data like images
|
| 99 |
+
clean_result.pop('annotated_image', None)
|
| 100 |
+
clean_result.pop('enhanced_image', None)
|
| 101 |
+
clean_result.pop('comparison_image', None)
|
| 102 |
+
clean_result.pop('original_image', None)
|
| 103 |
+
|
| 104 |
+
export_data['results'].append(clean_result)
|
| 105 |
+
|
| 106 |
+
with open(output_path, 'w', encoding='utf-8') as f:
|
| 107 |
+
json.dump(export_data, f, indent=2, default=str, ensure_ascii=False)
|
| 108 |
+
|
| 109 |
+
return True
|
| 110 |
+
|
| 111 |
+
except Exception as e:
|
| 112 |
+
print(f"Error exporting to JSON: {e}")
|
| 113 |
+
return False
|
| 114 |
+
|
| 115 |
+
def export_to_csv(self, results: List[Dict[str, Any]], output_path: str) -> bool:
|
| 116 |
+
"""
|
| 117 |
+
Export results to CSV format.
|
| 118 |
+
|
| 119 |
+
Args:
|
| 120 |
+
results: List of processing results
|
| 121 |
+
output_path: Output file path
|
| 122 |
+
|
| 123 |
+
Returns:
|
| 124 |
+
True if successful, False otherwise
|
| 125 |
+
"""
|
| 126 |
+
try:
|
| 127 |
+
csv_data = []
|
| 128 |
+
|
| 129 |
+
for result in results:
|
| 130 |
+
if not result.get('success'):
|
| 131 |
+
csv_data.append({
|
| 132 |
+
'filename': result.get('filename', ''),
|
| 133 |
+
'file_type': result.get('file_type', ''),
|
| 134 |
+
'success': False,
|
| 135 |
+
'error': result.get('error', ''),
|
| 136 |
+
'hands_detected': 0,
|
| 137 |
+
'hand_label': '',
|
| 138 |
+
'confidence': 0,
|
| 139 |
+
'letter': '',
|
| 140 |
+
'word': '',
|
| 141 |
+
'ai_confidence': 0
|
| 142 |
+
})
|
| 143 |
+
continue
|
| 144 |
+
|
| 145 |
+
if result.get('detections'):
|
| 146 |
+
for detection in result['detections']:
|
| 147 |
+
row = {
|
| 148 |
+
'filename': result.get('filename', ''),
|
| 149 |
+
'file_type': result.get('file_type', ''),
|
| 150 |
+
'success': True,
|
| 151 |
+
'error': '',
|
| 152 |
+
'hands_detected': result.get('hands_detected', 0),
|
| 153 |
+
'hand_label': detection.get('hand_label', ''),
|
| 154 |
+
'confidence': detection.get('confidence', 0),
|
| 155 |
+
'gesture_description': detection.get('gesture_description', '')
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
# Add classification data if available
|
| 159 |
+
if 'classification' in detection and detection['classification'].get('success'):
|
| 160 |
+
classification = detection['classification']
|
| 161 |
+
row.update({
|
| 162 |
+
'letter': classification.get('letter', ''),
|
| 163 |
+
'word': classification.get('word', ''),
|
| 164 |
+
'ai_confidence': classification.get('confidence', 0)
|
| 165 |
+
})
|
| 166 |
+
else:
|
| 167 |
+
row.update({
|
| 168 |
+
'letter': '',
|
| 169 |
+
'word': '',
|
| 170 |
+
'ai_confidence': 0
|
| 171 |
+
})
|
| 172 |
+
|
| 173 |
+
csv_data.append(row)
|
| 174 |
+
else:
|
| 175 |
+
# No detections
|
| 176 |
+
csv_data.append({
|
| 177 |
+
'filename': result.get('filename', ''),
|
| 178 |
+
'file_type': result.get('file_type', ''),
|
| 179 |
+
'success': True,
|
| 180 |
+
'error': '',
|
| 181 |
+
'hands_detected': 0,
|
| 182 |
+
'hand_label': '',
|
| 183 |
+
'confidence': 0,
|
| 184 |
+
'letter': '',
|
| 185 |
+
'word': '',
|
| 186 |
+
'ai_confidence': 0
|
| 187 |
+
})
|
| 188 |
+
|
| 189 |
+
# Write to CSV
|
| 190 |
+
if csv_data:
|
| 191 |
+
df = pd.DataFrame(csv_data)
|
| 192 |
+
df.to_csv(output_path, index=False)
|
| 193 |
+
return True
|
| 194 |
+
|
| 195 |
+
return False
|
| 196 |
+
|
| 197 |
+
except Exception as e:
|
| 198 |
+
print(f"Error exporting to CSV: {e}")
|
| 199 |
+
return False
|
| 200 |
+
|
| 201 |
+
def export_to_pdf(self, results: List[Dict[str, Any]],
|
| 202 |
+
output_path: str,
|
| 203 |
+
include_images: bool = True) -> bool:
|
| 204 |
+
"""
|
| 205 |
+
Export results to PDF report.
|
| 206 |
+
|
| 207 |
+
Args:
|
| 208 |
+
results: List of processing results
|
| 209 |
+
output_path: Output file path
|
| 210 |
+
include_images: Whether to include images in the report
|
| 211 |
+
|
| 212 |
+
Returns:
|
| 213 |
+
True if successful, False otherwise
|
| 214 |
+
"""
|
| 215 |
+
try:
|
| 216 |
+
doc = SimpleDocTemplate(output_path, pagesize=A4)
|
| 217 |
+
story = []
|
| 218 |
+
|
| 219 |
+
# Title
|
| 220 |
+
title = Paragraph("Sign Language Detection Report", self.custom_styles['CustomTitle'])
|
| 221 |
+
story.append(title)
|
| 222 |
+
story.append(Spacer(1, 20))
|
| 223 |
+
|
| 224 |
+
# Summary
|
| 225 |
+
successful_files = sum(1 for r in results if r.get('success', False))
|
| 226 |
+
total_hands = sum(r.get('hands_detected', 0) for r in results if r.get('success', False))
|
| 227 |
+
|
| 228 |
+
summary_text = f"""
|
| 229 |
+
<b>Processing Summary</b><br/>
|
| 230 |
+
Total Files: {len(results)}<br/>
|
| 231 |
+
Successful: {successful_files}<br/>
|
| 232 |
+
Total Hands Detected: {total_hands}<br/>
|
| 233 |
+
Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
|
| 234 |
+
"""
|
| 235 |
+
|
| 236 |
+
summary = Paragraph(summary_text, self.styles['Normal'])
|
| 237 |
+
story.append(summary)
|
| 238 |
+
story.append(Spacer(1, 20))
|
| 239 |
+
|
| 240 |
+
# Results for each file
|
| 241 |
+
for i, result in enumerate(results):
|
| 242 |
+
# File header
|
| 243 |
+
filename = result.get('filename', f'File {i+1}')
|
| 244 |
+
header = Paragraph(f"File: {filename}", self.custom_styles['CustomHeading'])
|
| 245 |
+
story.append(header)
|
| 246 |
+
|
| 247 |
+
if not result.get('success'):
|
| 248 |
+
error_text = f"<font color='red'>Error: {result.get('error', 'Unknown error')}</font>"
|
| 249 |
+
error_para = Paragraph(error_text, self.styles['Normal'])
|
| 250 |
+
story.append(error_para)
|
| 251 |
+
story.append(Spacer(1, 10))
|
| 252 |
+
continue
|
| 253 |
+
|
| 254 |
+
# File info
|
| 255 |
+
file_info = [
|
| 256 |
+
['Property', 'Value'],
|
| 257 |
+
['File Type', result.get('file_type', 'Unknown')],
|
| 258 |
+
['File Size', f"{result.get('file_size', 0) / 1024:.1f} KB"],
|
| 259 |
+
['Hands Detected', str(result.get('hands_detected', 0))]
|
| 260 |
+
]
|
| 261 |
+
|
| 262 |
+
if result.get('file_type') == 'video':
|
| 263 |
+
video_props = result.get('video_properties', {})
|
| 264 |
+
file_info.extend([
|
| 265 |
+
['Duration', f"{video_props.get('duration', 0):.1f}s"],
|
| 266 |
+
['FPS', f"{video_props.get('fps', 0):.1f}"],
|
| 267 |
+
['Total Frames', str(video_props.get('total_frames', 0))]
|
| 268 |
+
])
|
| 269 |
+
|
| 270 |
+
info_table = Table(file_info)
|
| 271 |
+
info_table.setStyle(TableStyle([
|
| 272 |
+
('BACKGROUND', (0, 0), (-1, 0), colors.grey),
|
| 273 |
+
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
|
| 274 |
+
('ALIGN', (0, 0), (-1, -1), 'LEFT'),
|
| 275 |
+
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
|
| 276 |
+
('FONTSIZE', (0, 0), (-1, 0), 12),
|
| 277 |
+
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
|
| 278 |
+
('BACKGROUND', (0, 1), (-1, -1), colors.beige),
|
| 279 |
+
('GRID', (0, 0), (-1, -1), 1, colors.black)
|
| 280 |
+
]))
|
| 281 |
+
|
| 282 |
+
story.append(info_table)
|
| 283 |
+
story.append(Spacer(1, 15))
|
| 284 |
+
|
| 285 |
+
# Detection details
|
| 286 |
+
if result.get('detections'):
|
| 287 |
+
detections_header = Paragraph("Detection Details", self.custom_styles['CustomSubheading'])
|
| 288 |
+
story.append(detections_header)
|
| 289 |
+
|
| 290 |
+
for j, detection in enumerate(result['detections']):
|
| 291 |
+
detection_text = f"""
|
| 292 |
+
<b>Hand {j+1}: {detection.get('hand_label', 'Unknown')}</b><br/>
|
| 293 |
+
Confidence: {detection.get('confidence', 0):.1%}<br/>
|
| 294 |
+
"""
|
| 295 |
+
|
| 296 |
+
if 'classification' in detection and detection['classification'].get('success'):
|
| 297 |
+
classification = detection['classification']
|
| 298 |
+
if classification.get('letter'):
|
| 299 |
+
detection_text += f"Letter: <b>{classification['letter']}</b><br/>"
|
| 300 |
+
if classification.get('word'):
|
| 301 |
+
detection_text += f"Word: <b>{classification['word']}</b><br/>"
|
| 302 |
+
if classification.get('confidence'):
|
| 303 |
+
detection_text += f"AI Confidence: {classification['confidence']:.1%}<br/>"
|
| 304 |
+
|
| 305 |
+
detection_para = Paragraph(detection_text, self.styles['Normal'])
|
| 306 |
+
story.append(detection_para)
|
| 307 |
+
story.append(Spacer(1, 10))
|
| 308 |
+
|
| 309 |
+
story.append(Spacer(1, 20))
|
| 310 |
+
|
| 311 |
+
# Build PDF
|
| 312 |
+
doc.build(story)
|
| 313 |
+
return True
|
| 314 |
+
|
| 315 |
+
except Exception as e:
|
| 316 |
+
print(f"Error exporting to PDF: {e}")
|
| 317 |
+
return False
|
| 318 |
+
|
| 319 |
+
def _clean_result_for_export(self, result: Dict[str, Any]) -> Dict[str, Any]:
|
| 320 |
+
"""
|
| 321 |
+
Clean result dictionary for export by converting numpy arrays to lists.
|
| 322 |
+
|
| 323 |
+
Args:
|
| 324 |
+
result: Result dictionary
|
| 325 |
+
|
| 326 |
+
Returns:
|
| 327 |
+
Cleaned result dictionary
|
| 328 |
+
"""
|
| 329 |
+
clean_result = {}
|
| 330 |
+
|
| 331 |
+
for key, value in result.items():
|
| 332 |
+
if isinstance(value, np.ndarray):
|
| 333 |
+
# Convert numpy arrays to base64 encoded strings for images
|
| 334 |
+
if key in ['annotated_image', 'enhanced_image', 'comparison_image', 'original_image']:
|
| 335 |
+
try:
|
| 336 |
+
# Convert to PIL Image and then to base64
|
| 337 |
+
if len(value.shape) == 3:
|
| 338 |
+
# Convert BGR to RGB for proper color representation
|
| 339 |
+
value_rgb = cv2.cvtColor(value, cv2.COLOR_BGR2RGB)
|
| 340 |
+
pil_image = Image.fromarray(value_rgb)
|
| 341 |
+
else:
|
| 342 |
+
pil_image = Image.fromarray(value)
|
| 343 |
+
|
| 344 |
+
buffer = io.BytesIO()
|
| 345 |
+
pil_image.save(buffer, format='PNG')
|
| 346 |
+
img_str = base64.b64encode(buffer.getvalue()).decode()
|
| 347 |
+
clean_result[key] = f"data:image/png;base64,{img_str}"
|
| 348 |
+
except:
|
| 349 |
+
clean_result[key] = None
|
| 350 |
+
else:
|
| 351 |
+
clean_result[key] = value.tolist()
|
| 352 |
+
elif isinstance(value, (list, dict)):
|
| 353 |
+
clean_result[key] = value
|
| 354 |
+
else:
|
| 355 |
+
clean_result[key] = value
|
| 356 |
+
|
| 357 |
+
return clean_result
|
| 358 |
+
|
| 359 |
+
def create_summary_report(self, results: List[Dict[str, Any]]) -> Dict[str, Any]:
|
| 360 |
+
"""
|
| 361 |
+
Create a summary report of the processing results.
|
| 362 |
+
|
| 363 |
+
Args:
|
| 364 |
+
results: List of processing results
|
| 365 |
+
|
| 366 |
+
Returns:
|
| 367 |
+
Summary report dictionary
|
| 368 |
+
"""
|
| 369 |
+
summary = {
|
| 370 |
+
'total_files': len(results),
|
| 371 |
+
'successful_files': 0,
|
| 372 |
+
'failed_files': 0,
|
| 373 |
+
'total_hands_detected': 0,
|
| 374 |
+
'file_types': {},
|
| 375 |
+
'detected_letters': {},
|
| 376 |
+
'detected_words': {},
|
| 377 |
+
'average_confidence': 0,
|
| 378 |
+
'processing_errors': []
|
| 379 |
+
}
|
| 380 |
+
|
| 381 |
+
confidences = []
|
| 382 |
+
|
| 383 |
+
for result in results:
|
| 384 |
+
if result.get('success'):
|
| 385 |
+
summary['successful_files'] += 1
|
| 386 |
+
summary['total_hands_detected'] += result.get('hands_detected', 0)
|
| 387 |
+
|
| 388 |
+
# File type statistics
|
| 389 |
+
file_type = result.get('file_type', 'unknown')
|
| 390 |
+
summary['file_types'][file_type] = summary['file_types'].get(file_type, 0) + 1
|
| 391 |
+
|
| 392 |
+
# Process detections
|
| 393 |
+
for detection in result.get('detections', []):
|
| 394 |
+
if 'confidence' in detection:
|
| 395 |
+
confidences.append(detection['confidence'])
|
| 396 |
+
|
| 397 |
+
if 'classification' in detection and detection['classification'].get('success'):
|
| 398 |
+
classification = detection['classification']
|
| 399 |
+
|
| 400 |
+
if classification.get('letter'):
|
| 401 |
+
letter = classification['letter']
|
| 402 |
+
summary['detected_letters'][letter] = summary['detected_letters'].get(letter, 0) + 1
|
| 403 |
+
|
| 404 |
+
if classification.get('word'):
|
| 405 |
+
word = classification['word']
|
| 406 |
+
summary['detected_words'][word] = summary['detected_words'].get(word, 0) + 1
|
| 407 |
+
else:
|
| 408 |
+
summary['failed_files'] += 1
|
| 409 |
+
summary['processing_errors'].append({
|
| 410 |
+
'filename': result.get('filename', 'unknown'),
|
| 411 |
+
'error': result.get('error', 'unknown error')
|
| 412 |
+
})
|
| 413 |
+
|
| 414 |
+
# Calculate average confidence
|
| 415 |
+
if confidences:
|
| 416 |
+
summary['average_confidence'] = sum(confidences) / len(confidences)
|
| 417 |
+
|
| 418 |
+
return summary
|
src/src/fallback_classifier.py
ADDED
|
@@ -0,0 +1,303 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Fallback Sign Language Classifier
|
| 3 |
+
|
| 4 |
+
This module provides basic sign language classification without requiring OpenAI API.
|
| 5 |
+
It uses rule-based pattern matching to identify common ASL letters and gestures.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from typing import Dict, Any, Optional
|
| 9 |
+
import re
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class FallbackSignLanguageClassifier:
|
| 13 |
+
"""
|
| 14 |
+
Fallback classifier for basic ASL recognition using pattern matching.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
def __init__(self):
|
| 18 |
+
"""Initialize the fallback classifier."""
|
| 19 |
+
self.debug = True
|
| 20 |
+
print("Fallback classifier initialized (no API required)")
|
| 21 |
+
|
| 22 |
+
def classify_gesture(self, gesture_description: str,
|
| 23 |
+
sign_language: str = "ASL",
|
| 24 |
+
context: Optional[str] = None) -> Dict[str, Any]:
|
| 25 |
+
"""
|
| 26 |
+
Classify gesture using rule-based pattern matching.
|
| 27 |
+
|
| 28 |
+
Args:
|
| 29 |
+
gesture_description: Description of the hand gesture
|
| 30 |
+
sign_language: Sign language type (default: ASL)
|
| 31 |
+
context: Additional context (optional)
|
| 32 |
+
|
| 33 |
+
Returns:
|
| 34 |
+
Classification result dictionary
|
| 35 |
+
"""
|
| 36 |
+
if self.debug:
|
| 37 |
+
print(f"\n=== Fallback Classification Debug ===")
|
| 38 |
+
print(f"Input: {gesture_description}")
|
| 39 |
+
|
| 40 |
+
try:
|
| 41 |
+
# Analyze the gesture description
|
| 42 |
+
result = self._analyze_gesture_patterns(gesture_description)
|
| 43 |
+
result['success'] = True
|
| 44 |
+
result['method'] = 'fallback_pattern_matching'
|
| 45 |
+
|
| 46 |
+
if self.debug:
|
| 47 |
+
print(f"Result: {result}")
|
| 48 |
+
print("=== End Fallback Debug ===\n")
|
| 49 |
+
|
| 50 |
+
return result
|
| 51 |
+
|
| 52 |
+
except Exception as e:
|
| 53 |
+
if self.debug:
|
| 54 |
+
print(f"Fallback classification error: {str(e)}")
|
| 55 |
+
print("=== End Fallback Debug ===\n")
|
| 56 |
+
|
| 57 |
+
return {
|
| 58 |
+
'success': False,
|
| 59 |
+
'error': str(e),
|
| 60 |
+
'letter': None,
|
| 61 |
+
'word': None,
|
| 62 |
+
'confidence': 0.0,
|
| 63 |
+
'description': 'Fallback classification failed',
|
| 64 |
+
'method': 'fallback_pattern_matching'
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
def _analyze_gesture_patterns(self, description: str) -> Dict[str, Any]:
|
| 68 |
+
"""
|
| 69 |
+
Analyze gesture description using pattern matching rules.
|
| 70 |
+
|
| 71 |
+
Args:
|
| 72 |
+
description: Gesture description string
|
| 73 |
+
|
| 74 |
+
Returns:
|
| 75 |
+
Classification result
|
| 76 |
+
"""
|
| 77 |
+
desc_lower = description.lower()
|
| 78 |
+
|
| 79 |
+
# Extract key information
|
| 80 |
+
extended_fingers = self._extract_extended_fingers(desc_lower)
|
| 81 |
+
closed_fingers = self._extract_closed_fingers(desc_lower)
|
| 82 |
+
patterns = self._extract_patterns(desc_lower)
|
| 83 |
+
|
| 84 |
+
# Rule-based classification
|
| 85 |
+
letter, word, confidence, explanation = self._apply_classification_rules(
|
| 86 |
+
extended_fingers, closed_fingers, patterns, desc_lower
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
return {
|
| 90 |
+
'letter': letter,
|
| 91 |
+
'word': word,
|
| 92 |
+
'confidence': confidence,
|
| 93 |
+
'description': explanation,
|
| 94 |
+
'extended_fingers': extended_fingers,
|
| 95 |
+
'closed_fingers': closed_fingers,
|
| 96 |
+
'patterns': patterns
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
def _extract_extended_fingers(self, description: str) -> list:
|
| 100 |
+
"""Extract extended fingers from description."""
|
| 101 |
+
extended = []
|
| 102 |
+
if 'extended fingers:' in description:
|
| 103 |
+
# Find the extended fingers section
|
| 104 |
+
start = description.find('extended fingers:') + len('extended fingers:')
|
| 105 |
+
end = description.find(';', start)
|
| 106 |
+
if end == -1:
|
| 107 |
+
end = len(description)
|
| 108 |
+
|
| 109 |
+
fingers_text = description[start:end].strip()
|
| 110 |
+
|
| 111 |
+
# Extract individual fingers
|
| 112 |
+
if 'thumb' in fingers_text:
|
| 113 |
+
extended.append('thumb')
|
| 114 |
+
if 'index' in fingers_text:
|
| 115 |
+
extended.append('index')
|
| 116 |
+
if 'middle' in fingers_text:
|
| 117 |
+
extended.append('middle')
|
| 118 |
+
if 'ring' in fingers_text:
|
| 119 |
+
extended.append('ring')
|
| 120 |
+
if 'pinky' in fingers_text:
|
| 121 |
+
extended.append('pinky')
|
| 122 |
+
|
| 123 |
+
return extended
|
| 124 |
+
|
| 125 |
+
def _extract_closed_fingers(self, description: str) -> list:
|
| 126 |
+
"""Extract closed fingers from description."""
|
| 127 |
+
closed = []
|
| 128 |
+
if 'closed fingers:' in description:
|
| 129 |
+
# Find the closed fingers section
|
| 130 |
+
start = description.find('closed fingers:') + len('closed fingers:')
|
| 131 |
+
end = description.find(';', start)
|
| 132 |
+
if end == -1:
|
| 133 |
+
end = len(description)
|
| 134 |
+
|
| 135 |
+
fingers_text = description[start:end].strip()
|
| 136 |
+
|
| 137 |
+
# Extract individual fingers
|
| 138 |
+
if 'thumb' in fingers_text:
|
| 139 |
+
closed.append('thumb')
|
| 140 |
+
if 'index' in fingers_text:
|
| 141 |
+
closed.append('index')
|
| 142 |
+
if 'middle' in fingers_text:
|
| 143 |
+
closed.append('middle')
|
| 144 |
+
if 'ring' in fingers_text:
|
| 145 |
+
closed.append('ring')
|
| 146 |
+
if 'pinky' in fingers_text:
|
| 147 |
+
closed.append('pinky')
|
| 148 |
+
|
| 149 |
+
return closed
|
| 150 |
+
|
| 151 |
+
def _extract_patterns(self, description: str) -> list:
|
| 152 |
+
"""Extract gesture patterns from description."""
|
| 153 |
+
patterns = []
|
| 154 |
+
|
| 155 |
+
if 'closed fist' in description:
|
| 156 |
+
patterns.append('closed_fist')
|
| 157 |
+
if 'open hand' in description:
|
| 158 |
+
patterns.append('open_hand')
|
| 159 |
+
if 'pointing gesture' in description:
|
| 160 |
+
patterns.append('pointing')
|
| 161 |
+
if 'pinch gesture' in description:
|
| 162 |
+
patterns.append('pinch')
|
| 163 |
+
|
| 164 |
+
return patterns
|
| 165 |
+
|
| 166 |
+
def _apply_classification_rules(self, extended: list, closed: list,
|
| 167 |
+
patterns: list, description: str) -> tuple:
|
| 168 |
+
"""
|
| 169 |
+
Apply enhanced ASL-specific classification logic.
|
| 170 |
+
|
| 171 |
+
Returns:
|
| 172 |
+
(letter, word, confidence, explanation)
|
| 173 |
+
"""
|
| 174 |
+
|
| 175 |
+
# PRECISE ASL RULES based on exact finger positions
|
| 176 |
+
|
| 177 |
+
# Rule 1: Single finger extended
|
| 178 |
+
if len(extended) == 1:
|
| 179 |
+
if 'index' in extended:
|
| 180 |
+
return '1', None, 0.9, "Index finger only = Number 1"
|
| 181 |
+
elif 'pinky' in extended:
|
| 182 |
+
return None, 'I', 0.9, "Pinky finger only = Pronoun I"
|
| 183 |
+
elif 'thumb' in extended:
|
| 184 |
+
return None, 'GOOD', 0.8, "Thumb up = GOOD"
|
| 185 |
+
elif 'middle' in extended:
|
| 186 |
+
return None, 'BAD', 0.6, "Middle finger = BAD (rude gesture)"
|
| 187 |
+
|
| 188 |
+
# Rule 2: Two fingers extended
|
| 189 |
+
if len(extended) == 2:
|
| 190 |
+
if 'index' in extended and 'middle' in extended:
|
| 191 |
+
return '2', None, 0.9, "Index and middle = Number 2"
|
| 192 |
+
elif 'index' in extended and 'thumb' in extended:
|
| 193 |
+
return 'L', None, 0.8, "Index and thumb = Letter L"
|
| 194 |
+
elif 'index' in extended and 'pinky' in extended:
|
| 195 |
+
return None, 'I LOVE YOU', 0.9, "Index and pinky = I LOVE YOU sign"
|
| 196 |
+
elif 'thumb' in extended and 'pinky' in extended:
|
| 197 |
+
return None, 'CALL', 0.7, "Thumb and pinky = CALL/PHONE"
|
| 198 |
+
|
| 199 |
+
# Rule 3: Three fingers extended
|
| 200 |
+
if len(extended) == 3:
|
| 201 |
+
if 'index' in extended and 'middle' in extended and 'ring' in extended:
|
| 202 |
+
return '3', None, 0.9, "Three middle fingers = Number 3"
|
| 203 |
+
elif 'thumb' in extended and 'index' in extended and 'pinky' in extended:
|
| 204 |
+
return None, 'I LOVE YOU', 0.9, "Thumb, index, pinky = I LOVE YOU"
|
| 205 |
+
|
| 206 |
+
# Rule 4: Four fingers extended (thumb closed)
|
| 207 |
+
if len(extended) == 4 and 'thumb' in closed:
|
| 208 |
+
return '4', None, 0.9, "Four fingers, thumb closed = Number 4"
|
| 209 |
+
|
| 210 |
+
# Rule 5: All five fingers extended
|
| 211 |
+
if len(extended) == 5:
|
| 212 |
+
return '5', None, 0.9, "All fingers extended = Number 5"
|
| 213 |
+
|
| 214 |
+
# Rule 6: Closed fist (no fingers extended)
|
| 215 |
+
if len(extended) == 0 or 'closed_fist' in patterns:
|
| 216 |
+
return 'A', None, 0.8, "Closed fist = Letter A"
|
| 217 |
+
|
| 218 |
+
# Rule 7: Four fingers extended (index, middle, ring, pinky) - thumb closed
|
| 219 |
+
if (len(extended) == 4 and 'index' in extended and 'middle' in extended
|
| 220 |
+
and 'ring' in extended and 'pinky' in extended and 'thumb' in closed):
|
| 221 |
+
return None, 'HELLO', 0.8, "Four fingers extended = HELLO"
|
| 222 |
+
|
| 223 |
+
# Rule 8: Pinch gesture pattern
|
| 224 |
+
if 'pinch' in patterns:
|
| 225 |
+
return 'F', None, 0.7, "Pinch gesture = Letter F"
|
| 226 |
+
|
| 227 |
+
# Rule 9: Pointing gesture pattern
|
| 228 |
+
if 'pointing' in patterns:
|
| 229 |
+
if 'index' in extended and len(extended) == 1:
|
| 230 |
+
return '1', None, 0.8, "Pointing with index = Number 1"
|
| 231 |
+
else:
|
| 232 |
+
return None, 'YOU', 0.6, "Pointing gesture = YOU"
|
| 233 |
+
|
| 234 |
+
# Rule 10: Open hand pattern
|
| 235 |
+
if 'open_hand' in patterns:
|
| 236 |
+
if len(extended) == 5:
|
| 237 |
+
return '5', None, 0.8, "Open hand = Number 5"
|
| 238 |
+
else:
|
| 239 |
+
return None, 'HELLO', 0.7, "Open hand = HELLO"
|
| 240 |
+
|
| 241 |
+
# Default fallback based on finger count with lower confidence
|
| 242 |
+
finger_count = len(extended)
|
| 243 |
+
if finger_count == 0:
|
| 244 |
+
return 'A', None, 0.4, f"No extended fingers, default to A"
|
| 245 |
+
elif finger_count == 1:
|
| 246 |
+
return '1', None, 0.4, f"One finger extended, default to 1"
|
| 247 |
+
elif finger_count == 2:
|
| 248 |
+
return '2', None, 0.4, f"Two fingers extended, default to 2"
|
| 249 |
+
elif finger_count == 3:
|
| 250 |
+
return '3', None, 0.4, f"Three fingers extended, default to 3"
|
| 251 |
+
elif finger_count == 4:
|
| 252 |
+
return '4', None, 0.4, f"Four fingers extended, default to 4"
|
| 253 |
+
elif finger_count == 5:
|
| 254 |
+
return '5', None, 0.4, f"Five fingers extended, default to 5"
|
| 255 |
+
else:
|
| 256 |
+
return None, None, 0.1, "Unable to classify gesture"
|
| 257 |
+
|
| 258 |
+
def classify_sequence(self, gesture_descriptions: list,
|
| 259 |
+
sign_language: str = "ASL") -> Dict[str, Any]:
|
| 260 |
+
"""
|
| 261 |
+
Classify a sequence of gestures (fallback implementation).
|
| 262 |
+
|
| 263 |
+
Args:
|
| 264 |
+
gesture_descriptions: List of gesture descriptions
|
| 265 |
+
sign_language: Sign language type
|
| 266 |
+
|
| 267 |
+
Returns:
|
| 268 |
+
Sequence classification result
|
| 269 |
+
"""
|
| 270 |
+
# Simple implementation: classify each gesture and combine
|
| 271 |
+
letters = []
|
| 272 |
+
words = []
|
| 273 |
+
|
| 274 |
+
for desc in gesture_descriptions:
|
| 275 |
+
result = self.classify_gesture(desc, sign_language)
|
| 276 |
+
if result.get('success'):
|
| 277 |
+
if result.get('letter'):
|
| 278 |
+
letters.append(result['letter'])
|
| 279 |
+
if result.get('word'):
|
| 280 |
+
words.append(result['word'])
|
| 281 |
+
|
| 282 |
+
# Try to form words from letters
|
| 283 |
+
if letters and not words:
|
| 284 |
+
letter_sequence = ''.join(letters)
|
| 285 |
+
# Check for common words
|
| 286 |
+
common_words = {
|
| 287 |
+
'HI': 'HI',
|
| 288 |
+
'NO': 'NO',
|
| 289 |
+
'OK': 'OK',
|
| 290 |
+
'YES': 'YES'
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
if letter_sequence in common_words:
|
| 294 |
+
words.append(common_words[letter_sequence])
|
| 295 |
+
|
| 296 |
+
return {
|
| 297 |
+
'success': True,
|
| 298 |
+
'word': words[0] if words else None,
|
| 299 |
+
'sentence': ' '.join(words) if len(words) > 1 else None,
|
| 300 |
+
'confidence': 0.6,
|
| 301 |
+
'individual_letters': letters,
|
| 302 |
+
'method': 'fallback_sequence_matching'
|
| 303 |
+
}
|
src/src/file_handler.py
ADDED
|
@@ -0,0 +1,543 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
File Input Handler for Sign Language Detection
|
| 3 |
+
Processes video and image files for gesture analysis
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import cv2
|
| 7 |
+
import numpy as np
|
| 8 |
+
import os
|
| 9 |
+
from typing import List, Dict, Any, Optional, Tuple, Generator
|
| 10 |
+
from PIL import Image
|
| 11 |
+
import time
|
| 12 |
+
|
| 13 |
+
from .hand_detector import HandDetector
|
| 14 |
+
from .gesture_extractor import GestureExtractor
|
| 15 |
+
from .openai_classifier import SignLanguageClassifier
|
| 16 |
+
from .gemini_classifier import GeminiSignLanguageClassifier
|
| 17 |
+
from .prediction_logger import PredictionLogger
|
| 18 |
+
from .visualization_utils import HandLandmarkVisualizer, create_comparison_view
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class FileHandler:
|
| 22 |
+
"""
|
| 23 |
+
Handles file input (images and videos) for sign language detection.
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
def __init__(self,
|
| 27 |
+
frame_skip: int = 5,
|
| 28 |
+
max_frames: int = 100):
|
| 29 |
+
"""
|
| 30 |
+
Initialize the FileHandler.
|
| 31 |
+
|
| 32 |
+
Args:
|
| 33 |
+
frame_skip: Number of frames to skip between processing (for videos)
|
| 34 |
+
max_frames: Maximum number of frames to process from a video
|
| 35 |
+
"""
|
| 36 |
+
self.frame_skip = frame_skip
|
| 37 |
+
self.max_frames = max_frames
|
| 38 |
+
|
| 39 |
+
# Initialize components
|
| 40 |
+
self.hand_detector = HandDetector(static_image_mode=True)
|
| 41 |
+
self.gesture_extractor = GestureExtractor()
|
| 42 |
+
self.classifier = None
|
| 43 |
+
self.visualizer = HandLandmarkVisualizer()
|
| 44 |
+
self.logger = PredictionLogger(debug=True)
|
| 45 |
+
|
| 46 |
+
# Supported file formats
|
| 47 |
+
self.supported_image_formats = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp'}
|
| 48 |
+
self.supported_video_formats = {'.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv'}
|
| 49 |
+
|
| 50 |
+
def initialize_classifier(self, api_key: Optional[str] = None, use_gemini: bool = True) -> bool:
|
| 51 |
+
"""
|
| 52 |
+
Initialize the AI classifier (Gemini or OpenAI).
|
| 53 |
+
|
| 54 |
+
Args:
|
| 55 |
+
api_key: API key (Gemini or OpenAI)
|
| 56 |
+
use_gemini: Whether to use Gemini instead of OpenAI (default: True)
|
| 57 |
+
|
| 58 |
+
Returns:
|
| 59 |
+
True if classifier initialized successfully, False otherwise
|
| 60 |
+
"""
|
| 61 |
+
# Check environment variable for preference
|
| 62 |
+
use_gemini_env = os.getenv('USE_GEMINI', 'True').lower() == 'true'
|
| 63 |
+
use_gemini = use_gemini and use_gemini_env
|
| 64 |
+
|
| 65 |
+
if use_gemini:
|
| 66 |
+
try:
|
| 67 |
+
self.classifier = GeminiSignLanguageClassifier(api_key=api_key)
|
| 68 |
+
print("✅ Gemini AI classifier initialized for file processing")
|
| 69 |
+
return True
|
| 70 |
+
except Exception as e:
|
| 71 |
+
print(f"⚠️ Failed to initialize Gemini classifier: {e}")
|
| 72 |
+
print("🔄 Falling back to OpenAI classifier...")
|
| 73 |
+
|
| 74 |
+
# Fallback to OpenAI
|
| 75 |
+
try:
|
| 76 |
+
self.classifier = SignLanguageClassifier(api_key=api_key)
|
| 77 |
+
print("✅ OpenAI classifier initialized as fallback")
|
| 78 |
+
return True
|
| 79 |
+
except Exception as e2:
|
| 80 |
+
print(f"❌ OpenAI classifier also failed: {e2}")
|
| 81 |
+
print("🔧 Will use pattern-based fallback only")
|
| 82 |
+
return False
|
| 83 |
+
else:
|
| 84 |
+
try:
|
| 85 |
+
self.classifier = SignLanguageClassifier(api_key=api_key)
|
| 86 |
+
print("✅ OpenAI classifier initialized for file processing")
|
| 87 |
+
return True
|
| 88 |
+
except Exception as e:
|
| 89 |
+
print(f"❌ Failed to initialize OpenAI classifier: {e}")
|
| 90 |
+
print("🔧 Will use pattern-based fallback only")
|
| 91 |
+
return False
|
| 92 |
+
|
| 93 |
+
def is_supported_file(self, file_path: str) -> bool:
|
| 94 |
+
"""
|
| 95 |
+
Check if the file format is supported.
|
| 96 |
+
|
| 97 |
+
Args:
|
| 98 |
+
file_path: Path to the file
|
| 99 |
+
|
| 100 |
+
Returns:
|
| 101 |
+
True if file format is supported, False otherwise
|
| 102 |
+
"""
|
| 103 |
+
if not os.path.exists(file_path):
|
| 104 |
+
return False
|
| 105 |
+
|
| 106 |
+
file_ext = os.path.splitext(file_path)[1].lower()
|
| 107 |
+
return file_ext in self.supported_image_formats or file_ext in self.supported_video_formats
|
| 108 |
+
|
| 109 |
+
def get_file_type(self, file_path: str) -> str:
|
| 110 |
+
"""
|
| 111 |
+
Determine if file is image or video.
|
| 112 |
+
|
| 113 |
+
Args:
|
| 114 |
+
file_path: Path to the file
|
| 115 |
+
|
| 116 |
+
Returns:
|
| 117 |
+
'image', 'video', or 'unknown'
|
| 118 |
+
"""
|
| 119 |
+
file_ext = os.path.splitext(file_path)[1].lower()
|
| 120 |
+
|
| 121 |
+
if file_ext in self.supported_image_formats:
|
| 122 |
+
return 'image'
|
| 123 |
+
elif file_ext in self.supported_video_formats:
|
| 124 |
+
return 'video'
|
| 125 |
+
else:
|
| 126 |
+
return 'unknown'
|
| 127 |
+
|
| 128 |
+
def process_image(self, image_path: str) -> Dict[str, Any]:
|
| 129 |
+
"""
|
| 130 |
+
Process a single image file for gesture detection.
|
| 131 |
+
|
| 132 |
+
Args:
|
| 133 |
+
image_path: Path to the image file
|
| 134 |
+
|
| 135 |
+
Returns:
|
| 136 |
+
Dictionary containing processing results
|
| 137 |
+
"""
|
| 138 |
+
if not os.path.exists(image_path):
|
| 139 |
+
return {'success': False, 'error': 'File not found'}
|
| 140 |
+
|
| 141 |
+
try:
|
| 142 |
+
# Load image
|
| 143 |
+
image = cv2.imread(image_path)
|
| 144 |
+
if image is None:
|
| 145 |
+
return {'success': False, 'error': 'Could not load image'}
|
| 146 |
+
|
| 147 |
+
# Detect hands
|
| 148 |
+
annotated_image, hand_landmarks = self.hand_detector.detect_hands(image)
|
| 149 |
+
|
| 150 |
+
print(f"\n=== Hand Detection Debug ===")
|
| 151 |
+
print(f"Processing image: {os.path.basename(image_path)}")
|
| 152 |
+
print(f"Image shape: {image.shape}")
|
| 153 |
+
print(f"Hands detected: {len(hand_landmarks) if hand_landmarks else 0}")
|
| 154 |
+
if hand_landmarks:
|
| 155 |
+
for i, hand in enumerate(hand_landmarks):
|
| 156 |
+
print(f"Hand {i+1}: {hand['label']}, confidence: {hand['confidence']:.3f}")
|
| 157 |
+
print("=== End Hand Detection Debug ===\n")
|
| 158 |
+
|
| 159 |
+
# Create enhanced visualization
|
| 160 |
+
enhanced_image = self.visualizer.draw_enhanced_landmarks(image, hand_landmarks) if hand_landmarks else annotated_image
|
| 161 |
+
|
| 162 |
+
# Create comparison view
|
| 163 |
+
comparison_image = create_comparison_view(image, enhanced_image)
|
| 164 |
+
|
| 165 |
+
# Process gestures
|
| 166 |
+
detections = []
|
| 167 |
+
if hand_landmarks:
|
| 168 |
+
for hand_data in hand_landmarks:
|
| 169 |
+
gesture_description = self.gesture_extractor.create_gesture_description(hand_data)
|
| 170 |
+
|
| 171 |
+
detection = {
|
| 172 |
+
'hand_label': hand_data['label'],
|
| 173 |
+
'gesture_description': gesture_description,
|
| 174 |
+
'confidence': hand_data['confidence'],
|
| 175 |
+
'bounding_box': self.hand_detector.get_bounding_box(
|
| 176 |
+
hand_data, image.shape[1], image.shape[0]
|
| 177 |
+
),
|
| 178 |
+
'landmarks_3d': hand_data['landmarks'] # Store for 3D visualization
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
# Classify gesture if classifier available
|
| 182 |
+
if self.classifier:
|
| 183 |
+
print(f"\n=== File Handler Debug ===")
|
| 184 |
+
print(f"Processing hand: {hand_data['label']}")
|
| 185 |
+
print(f"Gesture description: {gesture_description}")
|
| 186 |
+
|
| 187 |
+
classification = self.classifier.classify_gesture(gesture_description)
|
| 188 |
+
detection['classification'] = classification
|
| 189 |
+
|
| 190 |
+
print(f"Classification result: {classification}")
|
| 191 |
+
print("=== End File Handler Debug ===\n")
|
| 192 |
+
|
| 193 |
+
detections.append(detection)
|
| 194 |
+
|
| 195 |
+
return {
|
| 196 |
+
'success': True,
|
| 197 |
+
'file_path': image_path,
|
| 198 |
+
'file_type': 'image',
|
| 199 |
+
'image_shape': image.shape,
|
| 200 |
+
'hands_detected': len(hand_landmarks) if hand_landmarks else 0,
|
| 201 |
+
'detections': detections,
|
| 202 |
+
'annotated_image': annotated_image,
|
| 203 |
+
'enhanced_image': enhanced_image,
|
| 204 |
+
'comparison_image': comparison_image,
|
| 205 |
+
'original_image': image
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
except Exception as e:
|
| 209 |
+
return {'success': False, 'error': str(e)}
|
| 210 |
+
|
| 211 |
+
def process_video(self, video_path: str,
|
| 212 |
+
progress_callback: Optional[callable] = None) -> Dict[str, Any]:
|
| 213 |
+
"""
|
| 214 |
+
Process a video file for gesture detection.
|
| 215 |
+
|
| 216 |
+
Args:
|
| 217 |
+
video_path: Path to the video file
|
| 218 |
+
progress_callback: Optional callback for progress updates
|
| 219 |
+
|
| 220 |
+
Returns:
|
| 221 |
+
Dictionary containing processing results
|
| 222 |
+
"""
|
| 223 |
+
if not os.path.exists(video_path):
|
| 224 |
+
return {'success': False, 'error': 'File not found'}
|
| 225 |
+
|
| 226 |
+
try:
|
| 227 |
+
# Open video
|
| 228 |
+
cap = cv2.VideoCapture(video_path)
|
| 229 |
+
if not cap.isOpened():
|
| 230 |
+
return {'success': False, 'error': 'Could not open video file'}
|
| 231 |
+
|
| 232 |
+
# Get video properties
|
| 233 |
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 234 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 235 |
+
duration = total_frames / fps if fps > 0 else 0
|
| 236 |
+
|
| 237 |
+
# Process frames
|
| 238 |
+
frame_detections = []
|
| 239 |
+
frame_count = 0
|
| 240 |
+
processed_frames = 0
|
| 241 |
+
|
| 242 |
+
while cap.isOpened() and processed_frames < self.max_frames:
|
| 243 |
+
ret, frame = cap.read()
|
| 244 |
+
if not ret:
|
| 245 |
+
break
|
| 246 |
+
|
| 247 |
+
# Skip frames based on frame_skip setting
|
| 248 |
+
if frame_count % (self.frame_skip + 1) != 0:
|
| 249 |
+
frame_count += 1
|
| 250 |
+
continue
|
| 251 |
+
|
| 252 |
+
# Process frame
|
| 253 |
+
timestamp = frame_count / fps if fps > 0 else frame_count
|
| 254 |
+
frame_result = self._process_video_frame(frame, timestamp, frame_count)
|
| 255 |
+
|
| 256 |
+
if frame_result['hands_detected'] > 0:
|
| 257 |
+
frame_detections.append(frame_result)
|
| 258 |
+
|
| 259 |
+
processed_frames += 1
|
| 260 |
+
frame_count += 1
|
| 261 |
+
|
| 262 |
+
# Progress callback
|
| 263 |
+
if progress_callback:
|
| 264 |
+
progress = min(processed_frames / self.max_frames, frame_count / total_frames)
|
| 265 |
+
progress_callback(progress)
|
| 266 |
+
|
| 267 |
+
cap.release()
|
| 268 |
+
|
| 269 |
+
# Analyze sequence if detections found
|
| 270 |
+
sequence_analysis = None
|
| 271 |
+
if frame_detections and self.classifier:
|
| 272 |
+
sequence_analysis = self._analyze_video_sequence(frame_detections)
|
| 273 |
+
|
| 274 |
+
return {
|
| 275 |
+
'success': True,
|
| 276 |
+
'file_path': video_path,
|
| 277 |
+
'file_type': 'video',
|
| 278 |
+
'video_properties': {
|
| 279 |
+
'total_frames': total_frames,
|
| 280 |
+
'fps': fps,
|
| 281 |
+
'duration': duration,
|
| 282 |
+
'processed_frames': processed_frames
|
| 283 |
+
},
|
| 284 |
+
'frame_detections': frame_detections,
|
| 285 |
+
'sequence_analysis': sequence_analysis,
|
| 286 |
+
'total_hands_detected': sum(f['hands_detected'] for f in frame_detections)
|
| 287 |
+
}
|
| 288 |
+
|
| 289 |
+
except Exception as e:
|
| 290 |
+
return {'success': False, 'error': str(e)}
|
| 291 |
+
|
| 292 |
+
def _process_video_frame(self, frame: np.ndarray,
|
| 293 |
+
timestamp: float, frame_number: int) -> Dict[str, Any]:
|
| 294 |
+
"""
|
| 295 |
+
Process a single video frame.
|
| 296 |
+
|
| 297 |
+
Args:
|
| 298 |
+
frame: Video frame as numpy array
|
| 299 |
+
timestamp: Timestamp in seconds
|
| 300 |
+
frame_number: Frame number
|
| 301 |
+
|
| 302 |
+
Returns:
|
| 303 |
+
Dictionary containing frame processing results
|
| 304 |
+
"""
|
| 305 |
+
# Detect hands
|
| 306 |
+
annotated_frame, hand_landmarks = self.hand_detector.detect_hands(frame)
|
| 307 |
+
|
| 308 |
+
# Process gestures
|
| 309 |
+
detections = []
|
| 310 |
+
if hand_landmarks:
|
| 311 |
+
for hand_data in hand_landmarks:
|
| 312 |
+
gesture_description = self.gesture_extractor.create_gesture_description(hand_data)
|
| 313 |
+
|
| 314 |
+
detection = {
|
| 315 |
+
'hand_label': hand_data['label'],
|
| 316 |
+
'gesture_description': gesture_description,
|
| 317 |
+
'confidence': hand_data['confidence']
|
| 318 |
+
}
|
| 319 |
+
|
| 320 |
+
# Classify gesture if classifier available
|
| 321 |
+
if self.classifier:
|
| 322 |
+
classification = self.classifier.classify_gesture(gesture_description)
|
| 323 |
+
detection['classification'] = classification
|
| 324 |
+
|
| 325 |
+
detections.append(detection)
|
| 326 |
+
|
| 327 |
+
return {
|
| 328 |
+
'timestamp': timestamp,
|
| 329 |
+
'frame_number': frame_number,
|
| 330 |
+
'hands_detected': len(hand_landmarks) if hand_landmarks else 0,
|
| 331 |
+
'detections': detections
|
| 332 |
+
}
|
| 333 |
+
|
| 334 |
+
def _analyze_video_sequence(self, frame_detections: List[Dict[str, Any]]) -> Dict[str, Any]:
|
| 335 |
+
"""
|
| 336 |
+
Analyze sequence of video frame detections.
|
| 337 |
+
|
| 338 |
+
Args:
|
| 339 |
+
frame_detections: List of frame detection results
|
| 340 |
+
|
| 341 |
+
Returns:
|
| 342 |
+
Dictionary containing sequence analysis
|
| 343 |
+
"""
|
| 344 |
+
if not self.classifier:
|
| 345 |
+
return {'error': 'Classifier not initialized'}
|
| 346 |
+
|
| 347 |
+
try:
|
| 348 |
+
# Extract gesture descriptions from frames with detections
|
| 349 |
+
gesture_descriptions = []
|
| 350 |
+
for frame_data in frame_detections:
|
| 351 |
+
for detection in frame_data['detections']:
|
| 352 |
+
if detection.get('classification', {}).get('success', False):
|
| 353 |
+
gesture_descriptions.append(detection['gesture_description'])
|
| 354 |
+
|
| 355 |
+
if len(gesture_descriptions) < 2:
|
| 356 |
+
return {'error': 'Not enough gestures for sequence analysis'}
|
| 357 |
+
|
| 358 |
+
# Classify sequence
|
| 359 |
+
sequence_result = self.classifier.classify_sequence(gesture_descriptions)
|
| 360 |
+
|
| 361 |
+
# Add timing information
|
| 362 |
+
sequence_result['start_time'] = frame_detections[0]['timestamp']
|
| 363 |
+
sequence_result['end_time'] = frame_detections[-1]['timestamp']
|
| 364 |
+
sequence_result['duration'] = sequence_result['end_time'] - sequence_result['start_time']
|
| 365 |
+
sequence_result['gesture_count'] = len(gesture_descriptions)
|
| 366 |
+
|
| 367 |
+
return sequence_result
|
| 368 |
+
|
| 369 |
+
except Exception as e:
|
| 370 |
+
return {'error': str(e)}
|
| 371 |
+
|
| 372 |
+
def create_thumbnail(self, file_path: str, size: Tuple[int, int] = (150, 150)) -> Optional[np.ndarray]:
|
| 373 |
+
"""
|
| 374 |
+
Create a thumbnail for the given file.
|
| 375 |
+
|
| 376 |
+
Args:
|
| 377 |
+
file_path: Path to the file
|
| 378 |
+
size: Thumbnail size (width, height)
|
| 379 |
+
|
| 380 |
+
Returns:
|
| 381 |
+
Thumbnail image or None if failed
|
| 382 |
+
"""
|
| 383 |
+
try:
|
| 384 |
+
file_type = self.get_file_type(file_path)
|
| 385 |
+
|
| 386 |
+
if file_type == 'image':
|
| 387 |
+
image = cv2.imread(file_path)
|
| 388 |
+
if image is not None:
|
| 389 |
+
thumbnail = cv2.resize(image, size)
|
| 390 |
+
return thumbnail
|
| 391 |
+
|
| 392 |
+
elif file_type == 'video':
|
| 393 |
+
cap = cv2.VideoCapture(file_path)
|
| 394 |
+
if cap.isOpened():
|
| 395 |
+
ret, frame = cap.read()
|
| 396 |
+
if ret:
|
| 397 |
+
thumbnail = cv2.resize(frame, size)
|
| 398 |
+
cap.release()
|
| 399 |
+
return thumbnail
|
| 400 |
+
cap.release()
|
| 401 |
+
|
| 402 |
+
except Exception as e:
|
| 403 |
+
print(f"Error creating thumbnail for {file_path}: {e}")
|
| 404 |
+
|
| 405 |
+
return None
|
| 406 |
+
|
| 407 |
+
def get_file_metadata(self, file_path: str) -> Dict[str, Any]:
|
| 408 |
+
"""
|
| 409 |
+
Get metadata for a file.
|
| 410 |
+
|
| 411 |
+
Args:
|
| 412 |
+
file_path: Path to the file
|
| 413 |
+
|
| 414 |
+
Returns:
|
| 415 |
+
Dictionary containing file metadata
|
| 416 |
+
"""
|
| 417 |
+
metadata = {
|
| 418 |
+
'file_path': file_path,
|
| 419 |
+
'filename': os.path.basename(file_path),
|
| 420 |
+
'file_size': os.path.getsize(file_path) if os.path.exists(file_path) else 0,
|
| 421 |
+
'file_type': self.get_file_type(file_path),
|
| 422 |
+
'supported': self.is_supported_file(file_path)
|
| 423 |
+
}
|
| 424 |
+
|
| 425 |
+
try:
|
| 426 |
+
file_type = metadata['file_type']
|
| 427 |
+
|
| 428 |
+
if file_type == 'image':
|
| 429 |
+
image = cv2.imread(file_path)
|
| 430 |
+
if image is not None:
|
| 431 |
+
metadata.update({
|
| 432 |
+
'width': image.shape[1],
|
| 433 |
+
'height': image.shape[0],
|
| 434 |
+
'channels': image.shape[2] if len(image.shape) > 2 else 1
|
| 435 |
+
})
|
| 436 |
+
|
| 437 |
+
elif file_type == 'video':
|
| 438 |
+
cap = cv2.VideoCapture(file_path)
|
| 439 |
+
if cap.isOpened():
|
| 440 |
+
metadata.update({
|
| 441 |
+
'width': int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
|
| 442 |
+
'height': int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)),
|
| 443 |
+
'fps': cap.get(cv2.CAP_PROP_FPS),
|
| 444 |
+
'frame_count': int(cap.get(cv2.CAP_PROP_FRAME_COUNT)),
|
| 445 |
+
'duration': int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) / cap.get(cv2.CAP_PROP_FPS) if cap.get(cv2.CAP_PROP_FPS) > 0 else 0
|
| 446 |
+
})
|
| 447 |
+
cap.release()
|
| 448 |
+
|
| 449 |
+
except Exception as e:
|
| 450 |
+
metadata['error'] = str(e)
|
| 451 |
+
|
| 452 |
+
return metadata
|
| 453 |
+
|
| 454 |
+
def batch_process_files(self, file_paths: List[str],
|
| 455 |
+
progress_callback: Optional[callable] = None,
|
| 456 |
+
detailed_progress: Optional[callable] = None) -> List[Dict[str, Any]]:
|
| 457 |
+
"""
|
| 458 |
+
Enhanced batch processing with detailed progress tracking.
|
| 459 |
+
|
| 460 |
+
Args:
|
| 461 |
+
file_paths: List of file paths to process
|
| 462 |
+
progress_callback: Optional callback for overall progress updates
|
| 463 |
+
detailed_progress: Optional callback for detailed progress updates
|
| 464 |
+
|
| 465 |
+
Returns:
|
| 466 |
+
List of processing results for each file
|
| 467 |
+
"""
|
| 468 |
+
results = []
|
| 469 |
+
total_files = len(file_paths)
|
| 470 |
+
|
| 471 |
+
for i, file_path in enumerate(file_paths):
|
| 472 |
+
# Update detailed progress
|
| 473 |
+
if detailed_progress:
|
| 474 |
+
detailed_progress(f"Processing {os.path.basename(file_path)}...", i, total_files)
|
| 475 |
+
|
| 476 |
+
if not self.is_supported_file(file_path):
|
| 477 |
+
results.append({
|
| 478 |
+
'success': False,
|
| 479 |
+
'file_path': file_path,
|
| 480 |
+
'filename': os.path.basename(file_path),
|
| 481 |
+
'error': 'Unsupported file format'
|
| 482 |
+
})
|
| 483 |
+
continue
|
| 484 |
+
|
| 485 |
+
try:
|
| 486 |
+
file_type = self.get_file_type(file_path)
|
| 487 |
+
|
| 488 |
+
if file_type == 'image':
|
| 489 |
+
result = self.process_image(file_path)
|
| 490 |
+
elif file_type == 'video':
|
| 491 |
+
result = self.process_video(file_path, progress_callback=None) # Disable nested progress
|
| 492 |
+
else:
|
| 493 |
+
result = {
|
| 494 |
+
'success': False,
|
| 495 |
+
'file_path': file_path,
|
| 496 |
+
'filename': os.path.basename(file_path),
|
| 497 |
+
'error': 'Unknown file type'
|
| 498 |
+
}
|
| 499 |
+
|
| 500 |
+
# Add metadata
|
| 501 |
+
if result.get('success'):
|
| 502 |
+
metadata = self.get_file_metadata(file_path)
|
| 503 |
+
result.update(metadata)
|
| 504 |
+
|
| 505 |
+
results.append(result)
|
| 506 |
+
|
| 507 |
+
except Exception as e:
|
| 508 |
+
results.append({
|
| 509 |
+
'success': False,
|
| 510 |
+
'file_path': file_path,
|
| 511 |
+
'filename': os.path.basename(file_path),
|
| 512 |
+
'error': str(e)
|
| 513 |
+
})
|
| 514 |
+
|
| 515 |
+
# Update overall progress
|
| 516 |
+
if progress_callback:
|
| 517 |
+
progress_callback((i + 1) / total_files)
|
| 518 |
+
|
| 519 |
+
return results
|
| 520 |
+
|
| 521 |
+
def save_annotated_image(self, annotated_image: np.ndarray,
|
| 522 |
+
output_path: str) -> bool:
|
| 523 |
+
"""
|
| 524 |
+
Save annotated image to file.
|
| 525 |
+
|
| 526 |
+
Args:
|
| 527 |
+
annotated_image: Annotated image array
|
| 528 |
+
output_path: Path to save the image
|
| 529 |
+
|
| 530 |
+
Returns:
|
| 531 |
+
True if saved successfully, False otherwise
|
| 532 |
+
"""
|
| 533 |
+
try:
|
| 534 |
+
cv2.imwrite(output_path, annotated_image)
|
| 535 |
+
return True
|
| 536 |
+
except Exception as e:
|
| 537 |
+
print(f"Error saving annotated image: {e}")
|
| 538 |
+
return False
|
| 539 |
+
|
| 540 |
+
def cleanup(self):
|
| 541 |
+
"""Clean up resources."""
|
| 542 |
+
if self.hand_detector:
|
| 543 |
+
self.hand_detector.cleanup()
|
src/src/gemini_classifier.py
ADDED
|
@@ -0,0 +1,420 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Google Gemini Sign Language Classifier
|
| 3 |
+
|
| 4 |
+
This module provides sign language classification using Google's Gemini AI API.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import google.generativeai as genai
|
| 8 |
+
import os
|
| 9 |
+
from typing import List, Dict, Any, Optional
|
| 10 |
+
import json
|
| 11 |
+
import time
|
| 12 |
+
from dotenv import load_dotenv
|
| 13 |
+
from .fallback_classifier import FallbackSignLanguageClassifier
|
| 14 |
+
|
| 15 |
+
# Load environment variables
|
| 16 |
+
load_dotenv()
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class GeminiSignLanguageClassifier:
|
| 20 |
+
"""
|
| 21 |
+
Sign language classifier using Google Gemini AI.
|
| 22 |
+
"""
|
| 23 |
+
|
| 24 |
+
def __init__(self, api_key: Optional[str] = None, model: str = "gemini-1.5-flash"):
|
| 25 |
+
"""
|
| 26 |
+
Initialize the Gemini classifier.
|
| 27 |
+
|
| 28 |
+
Args:
|
| 29 |
+
api_key: Gemini API key (if None, will use environment variable)
|
| 30 |
+
model: Gemini model to use for classification
|
| 31 |
+
"""
|
| 32 |
+
self.api_key = api_key or os.getenv('GEMINI_API_KEY')
|
| 33 |
+
self.model_name = model
|
| 34 |
+
|
| 35 |
+
if not self.api_key:
|
| 36 |
+
raise ValueError("Gemini API key not provided. Set GEMINI_API_KEY environment variable or pass api_key parameter.")
|
| 37 |
+
|
| 38 |
+
# Configure Gemini
|
| 39 |
+
genai.configure(api_key=self.api_key)
|
| 40 |
+
self.model = genai.GenerativeModel(self.model_name)
|
| 41 |
+
|
| 42 |
+
# Enhanced rate limiting for free tier
|
| 43 |
+
self.last_request_time = 0
|
| 44 |
+
self.min_request_interval = 5.0 # 5 seconds between requests for free tier
|
| 45 |
+
self.request_count = 0
|
| 46 |
+
self.request_window_start = time.time()
|
| 47 |
+
self.max_requests_per_minute = 10 # Conservative limit for free tier
|
| 48 |
+
|
| 49 |
+
# Initialize fallback classifier
|
| 50 |
+
self.fallback_classifier = FallbackSignLanguageClassifier()
|
| 51 |
+
|
| 52 |
+
# Debug mode
|
| 53 |
+
self.debug = True
|
| 54 |
+
|
| 55 |
+
print(f"Gemini classifier initialized with fallback support")
|
| 56 |
+
|
| 57 |
+
def classify_gesture(self, gesture_description: str,
|
| 58 |
+
sign_language: str = "ASL",
|
| 59 |
+
context: Optional[str] = None) -> Dict[str, Any]:
|
| 60 |
+
"""
|
| 61 |
+
Classify a single gesture using Gemini AI.
|
| 62 |
+
|
| 63 |
+
Args:
|
| 64 |
+
gesture_description: Description of the hand gesture
|
| 65 |
+
sign_language: Sign language type (default: ASL)
|
| 66 |
+
context: Additional context (optional)
|
| 67 |
+
|
| 68 |
+
Returns:
|
| 69 |
+
Classification result dictionary
|
| 70 |
+
"""
|
| 71 |
+
self._rate_limit()
|
| 72 |
+
|
| 73 |
+
# Create the prompt
|
| 74 |
+
prompt = self._create_classification_prompt(gesture_description, sign_language, context)
|
| 75 |
+
|
| 76 |
+
if self.debug:
|
| 77 |
+
print(f"\n=== Gemini Classification Debug ===")
|
| 78 |
+
print(f"Input gesture description: {gesture_description}")
|
| 79 |
+
print(f"Prompt sent to Gemini: {prompt[:200]}...")
|
| 80 |
+
|
| 81 |
+
try:
|
| 82 |
+
response = self.model.generate_content(prompt)
|
| 83 |
+
response_content = response.text
|
| 84 |
+
|
| 85 |
+
if self.debug:
|
| 86 |
+
print(f"Gemini response: {response_content}")
|
| 87 |
+
|
| 88 |
+
result = self._parse_response(response_content)
|
| 89 |
+
result['raw_response'] = response_content
|
| 90 |
+
result['success'] = True
|
| 91 |
+
result['method'] = 'gemini_ai'
|
| 92 |
+
|
| 93 |
+
if self.debug:
|
| 94 |
+
print(f"Parsed result: {result}")
|
| 95 |
+
print("=== End Gemini Debug ===\n")
|
| 96 |
+
|
| 97 |
+
return result
|
| 98 |
+
|
| 99 |
+
except Exception as e:
|
| 100 |
+
error_msg = str(e)
|
| 101 |
+
if self.debug:
|
| 102 |
+
print(f"Gemini API Error: {error_msg}")
|
| 103 |
+
print("Falling back to pattern-based classification...")
|
| 104 |
+
|
| 105 |
+
# Use fallback classifier when Gemini API fails
|
| 106 |
+
try:
|
| 107 |
+
fallback_result = self.fallback_classifier.classify_gesture(
|
| 108 |
+
gesture_description, sign_language, context
|
| 109 |
+
)
|
| 110 |
+
fallback_result['fallback_used'] = True
|
| 111 |
+
fallback_result['gemini_error'] = error_msg
|
| 112 |
+
|
| 113 |
+
if self.debug:
|
| 114 |
+
print(f"Fallback result: {fallback_result}")
|
| 115 |
+
print("=== End Gemini Debug ===\n")
|
| 116 |
+
|
| 117 |
+
return fallback_result
|
| 118 |
+
|
| 119 |
+
except Exception as fallback_error:
|
| 120 |
+
if self.debug:
|
| 121 |
+
print(f"Fallback also failed: {str(fallback_error)}")
|
| 122 |
+
print("=== End Gemini Debug ===\n")
|
| 123 |
+
|
| 124 |
+
return {
|
| 125 |
+
'success': False,
|
| 126 |
+
'error': error_msg,
|
| 127 |
+
'fallback_error': str(fallback_error),
|
| 128 |
+
'letter': None,
|
| 129 |
+
'word': None,
|
| 130 |
+
'confidence': 0.0,
|
| 131 |
+
'description': None,
|
| 132 |
+
'method': 'gemini_ai'
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
def classify_sequence(self, gesture_descriptions: List[str],
|
| 136 |
+
sign_language: str = "ASL") -> Dict[str, Any]:
|
| 137 |
+
"""
|
| 138 |
+
Classify a sequence of gestures using Gemini AI.
|
| 139 |
+
|
| 140 |
+
Args:
|
| 141 |
+
gesture_descriptions: List of gesture descriptions
|
| 142 |
+
sign_language: Sign language type
|
| 143 |
+
|
| 144 |
+
Returns:
|
| 145 |
+
Sequence classification result
|
| 146 |
+
"""
|
| 147 |
+
self._rate_limit()
|
| 148 |
+
|
| 149 |
+
# Create sequence prompt
|
| 150 |
+
prompt = self._create_sequence_prompt(gesture_descriptions, sign_language)
|
| 151 |
+
|
| 152 |
+
try:
|
| 153 |
+
response = self.model.generate_content(prompt)
|
| 154 |
+
response_content = response.text
|
| 155 |
+
|
| 156 |
+
result = self._parse_sequence_response(response_content)
|
| 157 |
+
result['raw_response'] = response_content
|
| 158 |
+
result['success'] = True
|
| 159 |
+
result['method'] = 'gemini_ai'
|
| 160 |
+
|
| 161 |
+
return result
|
| 162 |
+
|
| 163 |
+
except Exception as e:
|
| 164 |
+
# Use fallback for sequence classification too
|
| 165 |
+
try:
|
| 166 |
+
fallback_result = self.fallback_classifier.classify_sequence(
|
| 167 |
+
gesture_descriptions, sign_language
|
| 168 |
+
)
|
| 169 |
+
fallback_result['fallback_used'] = True
|
| 170 |
+
fallback_result['gemini_error'] = str(e)
|
| 171 |
+
return fallback_result
|
| 172 |
+
|
| 173 |
+
except Exception as fallback_error:
|
| 174 |
+
return {
|
| 175 |
+
'success': False,
|
| 176 |
+
'error': str(e),
|
| 177 |
+
'fallback_error': str(fallback_error),
|
| 178 |
+
'word': None,
|
| 179 |
+
'sentence': None,
|
| 180 |
+
'confidence': 0.0,
|
| 181 |
+
'method': 'gemini_ai'
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
def _rate_limit(self):
|
| 185 |
+
"""Enhanced rate limiting for Gemini free tier."""
|
| 186 |
+
current_time = time.time()
|
| 187 |
+
|
| 188 |
+
# Reset request count every minute
|
| 189 |
+
if current_time - self.request_window_start >= 60:
|
| 190 |
+
self.request_count = 0
|
| 191 |
+
self.request_window_start = current_time
|
| 192 |
+
|
| 193 |
+
# Check if we've hit the per-minute limit
|
| 194 |
+
if self.request_count >= self.max_requests_per_minute:
|
| 195 |
+
sleep_time = 60 - (current_time - self.request_window_start) + 1
|
| 196 |
+
if self.debug:
|
| 197 |
+
print(f"⏳ Rate limit reached, sleeping for {sleep_time:.1f} seconds...")
|
| 198 |
+
time.sleep(sleep_time)
|
| 199 |
+
self.request_count = 0
|
| 200 |
+
self.request_window_start = time.time()
|
| 201 |
+
|
| 202 |
+
# Ensure minimum interval between requests
|
| 203 |
+
time_since_last_request = current_time - self.last_request_time
|
| 204 |
+
if time_since_last_request < self.min_request_interval:
|
| 205 |
+
sleep_time = self.min_request_interval - time_since_last_request
|
| 206 |
+
if self.debug:
|
| 207 |
+
print(f"⏳ Waiting {sleep_time:.1f} seconds between requests...")
|
| 208 |
+
time.sleep(sleep_time)
|
| 209 |
+
|
| 210 |
+
self.last_request_time = time.time()
|
| 211 |
+
self.request_count += 1
|
| 212 |
+
|
| 213 |
+
def _create_classification_prompt(self, gesture_description: str,
|
| 214 |
+
sign_language: str, context: Optional[str]) -> str:
|
| 215 |
+
"""Create enhanced prompt for single gesture classification."""
|
| 216 |
+
prompt = f"""You are an expert ASL (American Sign Language) interpreter. Analyze this hand gesture and provide ONE CLEAR PREDICTION.
|
| 217 |
+
|
| 218 |
+
GESTURE DATA:
|
| 219 |
+
{gesture_description}
|
| 220 |
+
|
| 221 |
+
COMMON ASL PATTERNS TO RECOGNIZE:
|
| 222 |
+
• Index finger pointing = Number "1"
|
| 223 |
+
• Pinky finger only = Pronoun "I"
|
| 224 |
+
• Thumb up = "GOOD" or "YES"
|
| 225 |
+
• All fingers extended = Number "5" or "HELLO"
|
| 226 |
+
• Closed fist = Letter "A" or "S"
|
| 227 |
+
• Index + middle = Number "2"
|
| 228 |
+
• Three fingers = Number "3"
|
| 229 |
+
• Four fingers = Number "4"
|
| 230 |
+
• Index + pinky = "I LOVE YOU"
|
| 231 |
+
• Thumb + index = Letter "L"
|
| 232 |
+
|
| 233 |
+
TASK: Based on the finger positions described, identify what this gesture most likely represents:
|
| 234 |
+
- A single letter (A-Z)
|
| 235 |
+
- A single number (0-9)
|
| 236 |
+
- A complete word (HELLO, GOOD, I, YOU, LOVE, etc.)
|
| 237 |
+
|
| 238 |
+
Even if not a perfect match, provide your best interpretation based on ASL knowledge.
|
| 239 |
+
|
| 240 |
+
"""
|
| 241 |
+
|
| 242 |
+
if context:
|
| 243 |
+
prompt += f"Context: {context}\n\n"
|
| 244 |
+
|
| 245 |
+
prompt += """Respond in this EXACT JSON format (choose ONE prediction):
|
| 246 |
+
{
|
| 247 |
+
"letter": "1",
|
| 248 |
+
"word": null,
|
| 249 |
+
"confidence": 0.85,
|
| 250 |
+
"description": "Index finger pointing = Number 1"
|
| 251 |
+
}
|
| 252 |
+
|
| 253 |
+
OR for a word:
|
| 254 |
+
{
|
| 255 |
+
"letter": null,
|
| 256 |
+
"word": "GOOD",
|
| 257 |
+
"confidence": 0.85,
|
| 258 |
+
"description": "Thumb up = GOOD"
|
| 259 |
+
}
|
| 260 |
+
|
| 261 |
+
IMPORTANT: Always provide either a letter OR a word, never both null. Make your best guess based on ASL knowledge."""
|
| 262 |
+
|
| 263 |
+
return prompt
|
| 264 |
+
|
| 265 |
+
def _create_sequence_prompt(self, gesture_descriptions: List[str],
|
| 266 |
+
sign_language: str) -> str:
|
| 267 |
+
"""Create prompt for gesture sequence classification."""
|
| 268 |
+
prompt = f"""Analyze this sequence of {sign_language} hand gestures:
|
| 269 |
+
|
| 270 |
+
"""
|
| 271 |
+
|
| 272 |
+
for i, description in enumerate(gesture_descriptions, 1):
|
| 273 |
+
prompt += f"Gesture {i}: {description}\n"
|
| 274 |
+
|
| 275 |
+
prompt += f"""
|
| 276 |
+
What word or sentence do these {sign_language} gestures spell out when combined?
|
| 277 |
+
Consider the sequence and flow of the gestures.
|
| 278 |
+
|
| 279 |
+
Respond in JSON format:
|
| 280 |
+
{{
|
| 281 |
+
"word": "HELLO" or null,
|
| 282 |
+
"sentence": "HELLO WORLD" or null,
|
| 283 |
+
"confidence": 0.85,
|
| 284 |
+
"individual_letters": ["H", "E", "L", "L", "O"]
|
| 285 |
+
}}"""
|
| 286 |
+
|
| 287 |
+
return prompt
|
| 288 |
+
|
| 289 |
+
def _parse_response(self, response_text: str) -> Dict[str, Any]:
|
| 290 |
+
"""Parse Gemini response for single gesture classification."""
|
| 291 |
+
try:
|
| 292 |
+
# Try to parse as JSON first
|
| 293 |
+
if '{' in response_text and '}' in response_text:
|
| 294 |
+
json_start = response_text.find('{')
|
| 295 |
+
json_end = response_text.rfind('}') + 1
|
| 296 |
+
json_str = response_text[json_start:json_end]
|
| 297 |
+
result = json.loads(json_str)
|
| 298 |
+
|
| 299 |
+
# Extract values
|
| 300 |
+
letter = result.get('letter')
|
| 301 |
+
word = result.get('word')
|
| 302 |
+
confidence = float(result.get('confidence', 0.0))
|
| 303 |
+
description = result.get('description', '')
|
| 304 |
+
|
| 305 |
+
# If both are null, try to extract from description
|
| 306 |
+
if not letter and not word:
|
| 307 |
+
if self.debug:
|
| 308 |
+
print("⚠️ Gemini returned null values, trying to extract from description...")
|
| 309 |
+
|
| 310 |
+
# Try to extract prediction from description
|
| 311 |
+
desc_lower = description.lower()
|
| 312 |
+
|
| 313 |
+
# Look for numbers
|
| 314 |
+
for num in ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0']:
|
| 315 |
+
if f"number '{num}'" in desc_lower or f"number {num}" in desc_lower:
|
| 316 |
+
letter = num
|
| 317 |
+
break
|
| 318 |
+
|
| 319 |
+
# Look for letters
|
| 320 |
+
if not letter:
|
| 321 |
+
for char in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ':
|
| 322 |
+
if f"letter '{char.lower()}'" in desc_lower or f"letter {char.lower()}" in desc_lower:
|
| 323 |
+
letter = char
|
| 324 |
+
break
|
| 325 |
+
|
| 326 |
+
# Look for words
|
| 327 |
+
if not letter and not word:
|
| 328 |
+
common_words = ['good', 'hello', 'i', 'you', 'love', 'yes', 'no', 'please', 'thank you']
|
| 329 |
+
for w in common_words:
|
| 330 |
+
if w in desc_lower:
|
| 331 |
+
word = w.upper()
|
| 332 |
+
break
|
| 333 |
+
|
| 334 |
+
return {
|
| 335 |
+
'letter': letter,
|
| 336 |
+
'word': word,
|
| 337 |
+
'confidence': confidence,
|
| 338 |
+
'description': description
|
| 339 |
+
}
|
| 340 |
+
else:
|
| 341 |
+
# Fallback: simple text parsing
|
| 342 |
+
return self._parse_text_response(response_text)
|
| 343 |
+
|
| 344 |
+
except (json.JSONDecodeError, ValueError):
|
| 345 |
+
return self._parse_text_response(response_text)
|
| 346 |
+
|
| 347 |
+
def _parse_sequence_response(self, response_text: str) -> Dict[str, Any]:
|
| 348 |
+
"""Parse Gemini response for sequence classification."""
|
| 349 |
+
try:
|
| 350 |
+
if '{' in response_text and '}' in response_text:
|
| 351 |
+
json_start = response_text.find('{')
|
| 352 |
+
json_end = response_text.rfind('}') + 1
|
| 353 |
+
json_str = response_text[json_start:json_end]
|
| 354 |
+
result = json.loads(json_str)
|
| 355 |
+
|
| 356 |
+
return {
|
| 357 |
+
'word': result.get('word'),
|
| 358 |
+
'sentence': result.get('sentence'),
|
| 359 |
+
'confidence': float(result.get('confidence', 0.0)),
|
| 360 |
+
'individual_letters': result.get('individual_letters', [])
|
| 361 |
+
}
|
| 362 |
+
else:
|
| 363 |
+
return self._parse_sequence_text_response(response_text)
|
| 364 |
+
|
| 365 |
+
except (json.JSONDecodeError, ValueError):
|
| 366 |
+
return self._parse_sequence_text_response(response_text)
|
| 367 |
+
|
| 368 |
+
def _parse_text_response(self, response_text: str) -> Dict[str, Any]:
|
| 369 |
+
"""Enhanced fallback text parsing for single gesture."""
|
| 370 |
+
response_lower = response_text.lower()
|
| 371 |
+
|
| 372 |
+
# Common ASL words to look for
|
| 373 |
+
common_words = ['hello', 'hungry', 'thank you', 'please', 'sorry', 'yes', 'no',
|
| 374 |
+
'i', 'you', 'love', 'help', 'more', 'water', 'eat', 'drink',
|
| 375 |
+
'good', 'bad', 'happy', 'sad', 'stop', 'go', 'come', 'home']
|
| 376 |
+
|
| 377 |
+
# Look for words first (priority)
|
| 378 |
+
word = None
|
| 379 |
+
for w in common_words:
|
| 380 |
+
if w in response_lower:
|
| 381 |
+
word = w.upper()
|
| 382 |
+
break
|
| 383 |
+
|
| 384 |
+
# Look for letter patterns
|
| 385 |
+
letter = None
|
| 386 |
+
if not word: # Only look for letters if no word found
|
| 387 |
+
import re
|
| 388 |
+
# Look for single letters
|
| 389 |
+
letter_match = re.search(r'\b([A-Z])\b', response_text.upper())
|
| 390 |
+
if letter_match:
|
| 391 |
+
letter = letter_match.group(1)
|
| 392 |
+
|
| 393 |
+
# Look for numbers
|
| 394 |
+
number_match = re.search(r'\b([0-9])\b', response_text)
|
| 395 |
+
if number_match:
|
| 396 |
+
letter = number_match.group(1)
|
| 397 |
+
|
| 398 |
+
# Extract confidence if mentioned
|
| 399 |
+
confidence = 0.5 # Default
|
| 400 |
+
conf_match = re.search(r'(\d+(?:\.\d+)?)\s*%', response_text)
|
| 401 |
+
if conf_match:
|
| 402 |
+
confidence = float(conf_match.group(1)) / 100
|
| 403 |
+
|
| 404 |
+
return {
|
| 405 |
+
'letter': letter,
|
| 406 |
+
'word': word,
|
| 407 |
+
'confidence': confidence,
|
| 408 |
+
'description': f"Parsed from text: {response_text[:100]}..."
|
| 409 |
+
}
|
| 410 |
+
|
| 411 |
+
def _parse_sequence_text_response(self, response_text: str) -> Dict[str, Any]:
|
| 412 |
+
"""Fallback text parsing for sequence."""
|
| 413 |
+
# Simple implementation for sequence parsing
|
| 414 |
+
return {
|
| 415 |
+
'word': None,
|
| 416 |
+
'sentence': None,
|
| 417 |
+
'confidence': 0.3,
|
| 418 |
+
'individual_letters': [],
|
| 419 |
+
'description': f"Text parsing fallback: {response_text[:100]}..."
|
| 420 |
+
}
|
src/src/gesture_extractor.py
ADDED
|
@@ -0,0 +1,270 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Gesture Feature Extraction Module
|
| 3 |
+
Processes hand landmark data into simplified format for OpenAI API classification
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
import math
|
| 8 |
+
from typing import List, Dict, Any, Tuple, Optional
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class GestureExtractor:
|
| 12 |
+
"""
|
| 13 |
+
A class for extracting gesture features from hand landmarks.
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
def __init__(self):
|
| 17 |
+
"""Initialize the GestureExtractor."""
|
| 18 |
+
# Define finger tip and base indices for easier processing
|
| 19 |
+
self.finger_tips = [4, 8, 12, 16, 20] # Thumb, Index, Middle, Ring, Pinky tips
|
| 20 |
+
self.finger_bases = [2, 5, 9, 13, 17] # Finger base joints
|
| 21 |
+
self.finger_pips = [3, 6, 10, 14, 18] # PIP joints
|
| 22 |
+
|
| 23 |
+
def normalize_landmarks(self, hand_landmarks: Dict[str, Any]) -> List[Dict[str, float]]:
|
| 24 |
+
"""
|
| 25 |
+
Normalize hand landmarks relative to wrist position and hand size.
|
| 26 |
+
|
| 27 |
+
Args:
|
| 28 |
+
hand_landmarks: Hand landmark data from MediaPipe
|
| 29 |
+
|
| 30 |
+
Returns:
|
| 31 |
+
List of normalized landmark coordinates
|
| 32 |
+
"""
|
| 33 |
+
landmarks = hand_landmarks['landmarks']
|
| 34 |
+
|
| 35 |
+
# Get wrist position (landmark 0)
|
| 36 |
+
wrist = landmarks[0]
|
| 37 |
+
wrist_x, wrist_y = wrist['x'], wrist['y']
|
| 38 |
+
|
| 39 |
+
# Calculate hand size (distance from wrist to middle finger MCP)
|
| 40 |
+
middle_mcp = landmarks[9]
|
| 41 |
+
hand_size = math.sqrt(
|
| 42 |
+
(middle_mcp['x'] - wrist_x) ** 2 +
|
| 43 |
+
(middle_mcp['y'] - wrist_y) ** 2
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
# Avoid division by zero
|
| 47 |
+
if hand_size == 0:
|
| 48 |
+
hand_size = 1.0
|
| 49 |
+
|
| 50 |
+
# Normalize all landmarks
|
| 51 |
+
normalized_landmarks = []
|
| 52 |
+
for landmark in landmarks:
|
| 53 |
+
normalized = {
|
| 54 |
+
'x': (landmark['x'] - wrist_x) / hand_size,
|
| 55 |
+
'y': (landmark['y'] - wrist_y) / hand_size,
|
| 56 |
+
'z': landmark['z'] / hand_size
|
| 57 |
+
}
|
| 58 |
+
normalized_landmarks.append(normalized)
|
| 59 |
+
|
| 60 |
+
return normalized_landmarks
|
| 61 |
+
|
| 62 |
+
def extract_finger_states(self, normalized_landmarks: List[Dict[str, float]]) -> Dict[str, bool]:
|
| 63 |
+
"""
|
| 64 |
+
Determine which fingers are extended or closed.
|
| 65 |
+
|
| 66 |
+
Args:
|
| 67 |
+
normalized_landmarks: Normalized landmark coordinates
|
| 68 |
+
|
| 69 |
+
Returns:
|
| 70 |
+
Dictionary with finger states (True = extended, False = closed)
|
| 71 |
+
"""
|
| 72 |
+
finger_names = ['thumb', 'index', 'middle', 'ring', 'pinky']
|
| 73 |
+
finger_states = {}
|
| 74 |
+
|
| 75 |
+
for i, finger_name in enumerate(finger_names):
|
| 76 |
+
tip_idx = self.finger_tips[i]
|
| 77 |
+
pip_idx = self.finger_pips[i]
|
| 78 |
+
|
| 79 |
+
# For thumb, use different logic (horizontal movement)
|
| 80 |
+
if finger_name == 'thumb':
|
| 81 |
+
# Compare thumb tip with thumb IP joint
|
| 82 |
+
tip_x = normalized_landmarks[tip_idx]['x']
|
| 83 |
+
ip_x = normalized_landmarks[3]['x'] # Thumb IP joint
|
| 84 |
+
finger_states[finger_name] = abs(tip_x - ip_x) > 0.1
|
| 85 |
+
else:
|
| 86 |
+
# For other fingers, compare tip Y with PIP Y
|
| 87 |
+
tip_y = normalized_landmarks[tip_idx]['y']
|
| 88 |
+
pip_y = normalized_landmarks[pip_idx]['y']
|
| 89 |
+
finger_states[finger_name] = tip_y < pip_y # Extended if tip is above PIP
|
| 90 |
+
|
| 91 |
+
return finger_states
|
| 92 |
+
|
| 93 |
+
def calculate_angles(self, normalized_landmarks: List[Dict[str, float]]) -> Dict[str, float]:
|
| 94 |
+
"""
|
| 95 |
+
Calculate angles between key landmarks.
|
| 96 |
+
|
| 97 |
+
Args:
|
| 98 |
+
normalized_landmarks: Normalized landmark coordinates
|
| 99 |
+
|
| 100 |
+
Returns:
|
| 101 |
+
Dictionary of calculated angles
|
| 102 |
+
"""
|
| 103 |
+
angles = {}
|
| 104 |
+
|
| 105 |
+
# Calculate angle between thumb and index finger
|
| 106 |
+
thumb_tip = normalized_landmarks[4]
|
| 107 |
+
index_tip = normalized_landmarks[8]
|
| 108 |
+
wrist = normalized_landmarks[0]
|
| 109 |
+
|
| 110 |
+
# Vector from wrist to thumb tip
|
| 111 |
+
thumb_vector = np.array([thumb_tip['x'] - wrist['x'], thumb_tip['y'] - wrist['y']])
|
| 112 |
+
# Vector from wrist to index tip
|
| 113 |
+
index_vector = np.array([index_tip['x'] - wrist['x'], index_tip['y'] - wrist['y']])
|
| 114 |
+
|
| 115 |
+
# Calculate angle between vectors
|
| 116 |
+
dot_product = np.dot(thumb_vector, index_vector)
|
| 117 |
+
norms = np.linalg.norm(thumb_vector) * np.linalg.norm(index_vector)
|
| 118 |
+
|
| 119 |
+
if norms > 0:
|
| 120 |
+
cos_angle = dot_product / norms
|
| 121 |
+
cos_angle = np.clip(cos_angle, -1.0, 1.0) # Ensure valid range
|
| 122 |
+
angles['thumb_index_angle'] = math.degrees(math.acos(cos_angle))
|
| 123 |
+
else:
|
| 124 |
+
angles['thumb_index_angle'] = 0.0
|
| 125 |
+
|
| 126 |
+
# Calculate hand orientation (angle of palm)
|
| 127 |
+
middle_mcp = normalized_landmarks[9]
|
| 128 |
+
wrist = normalized_landmarks[0]
|
| 129 |
+
palm_vector = np.array([middle_mcp['x'] - wrist['x'], middle_mcp['y'] - wrist['y']])
|
| 130 |
+
|
| 131 |
+
# Angle with vertical axis
|
| 132 |
+
vertical = np.array([0, -1]) # Pointing up
|
| 133 |
+
dot_product = np.dot(palm_vector, vertical)
|
| 134 |
+
norms = np.linalg.norm(palm_vector) * np.linalg.norm(vertical)
|
| 135 |
+
|
| 136 |
+
if norms > 0:
|
| 137 |
+
cos_angle = dot_product / norms
|
| 138 |
+
cos_angle = np.clip(cos_angle, -1.0, 1.0)
|
| 139 |
+
angles['palm_orientation'] = math.degrees(math.acos(cos_angle))
|
| 140 |
+
else:
|
| 141 |
+
angles['palm_orientation'] = 0.0
|
| 142 |
+
|
| 143 |
+
return angles
|
| 144 |
+
|
| 145 |
+
def extract_distances(self, normalized_landmarks: List[Dict[str, float]]) -> Dict[str, float]:
|
| 146 |
+
"""
|
| 147 |
+
Calculate distances between key landmarks.
|
| 148 |
+
|
| 149 |
+
Args:
|
| 150 |
+
normalized_landmarks: Normalized landmark coordinates
|
| 151 |
+
|
| 152 |
+
Returns:
|
| 153 |
+
Dictionary of calculated distances
|
| 154 |
+
"""
|
| 155 |
+
distances = {}
|
| 156 |
+
|
| 157 |
+
# Distance between thumb tip and index tip
|
| 158 |
+
thumb_tip = normalized_landmarks[4]
|
| 159 |
+
index_tip = normalized_landmarks[8]
|
| 160 |
+
distances['thumb_index_distance'] = math.sqrt(
|
| 161 |
+
(thumb_tip['x'] - index_tip['x']) ** 2 +
|
| 162 |
+
(thumb_tip['y'] - index_tip['y']) ** 2
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
# Distance between index and middle finger tips
|
| 166 |
+
middle_tip = normalized_landmarks[12]
|
| 167 |
+
distances['index_middle_distance'] = math.sqrt(
|
| 168 |
+
(index_tip['x'] - middle_tip['x']) ** 2 +
|
| 169 |
+
(index_tip['y'] - middle_tip['y']) ** 2
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
# Distance from wrist to each fingertip
|
| 173 |
+
wrist = normalized_landmarks[0]
|
| 174 |
+
for i, finger_name in enumerate(['thumb', 'index', 'middle', 'ring', 'pinky']):
|
| 175 |
+
tip_idx = self.finger_tips[i]
|
| 176 |
+
tip = normalized_landmarks[tip_idx]
|
| 177 |
+
distances[f'wrist_{finger_name}_distance'] = math.sqrt(
|
| 178 |
+
(tip['x'] - wrist['x']) ** 2 +
|
| 179 |
+
(tip['y'] - wrist['y']) ** 2
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
return distances
|
| 183 |
+
|
| 184 |
+
def create_gesture_description(self, hand_landmarks: Dict[str, Any]) -> str:
|
| 185 |
+
"""
|
| 186 |
+
Create a textual description of the gesture for OpenAI API.
|
| 187 |
+
|
| 188 |
+
Args:
|
| 189 |
+
hand_landmarks: Hand landmark data from MediaPipe
|
| 190 |
+
|
| 191 |
+
Returns:
|
| 192 |
+
String description of the gesture
|
| 193 |
+
"""
|
| 194 |
+
normalized_landmarks = self.normalize_landmarks(hand_landmarks)
|
| 195 |
+
finger_states = self.extract_finger_states(normalized_landmarks)
|
| 196 |
+
angles = self.calculate_angles(normalized_landmarks)
|
| 197 |
+
distances = self.extract_distances(normalized_landmarks)
|
| 198 |
+
|
| 199 |
+
# Create description
|
| 200 |
+
description_parts = []
|
| 201 |
+
|
| 202 |
+
# Hand label
|
| 203 |
+
description_parts.append(f"Hand: {hand_landmarks['label']}")
|
| 204 |
+
|
| 205 |
+
# Finger states
|
| 206 |
+
extended_fingers = [name for name, extended in finger_states.items() if extended]
|
| 207 |
+
closed_fingers = [name for name, extended in finger_states.items() if not extended]
|
| 208 |
+
|
| 209 |
+
if extended_fingers:
|
| 210 |
+
description_parts.append(f"Extended fingers: {', '.join(extended_fingers)}")
|
| 211 |
+
if closed_fingers:
|
| 212 |
+
description_parts.append(f"Closed fingers: {', '.join(closed_fingers)}")
|
| 213 |
+
|
| 214 |
+
# Key measurements
|
| 215 |
+
description_parts.append(f"Thumb-index angle: {angles['thumb_index_angle']:.1f} degrees")
|
| 216 |
+
description_parts.append(f"Thumb-index distance: {distances['thumb_index_distance']:.3f}")
|
| 217 |
+
description_parts.append(f"Palm orientation: {angles['palm_orientation']:.1f} degrees")
|
| 218 |
+
|
| 219 |
+
# Special gesture patterns
|
| 220 |
+
if all(not extended for extended in finger_states.values()):
|
| 221 |
+
description_parts.append("Pattern: Closed fist")
|
| 222 |
+
elif all(extended for extended in finger_states.values()):
|
| 223 |
+
description_parts.append("Pattern: Open hand")
|
| 224 |
+
elif finger_states['index'] and not any(finger_states[f] for f in ['middle', 'ring', 'pinky']):
|
| 225 |
+
description_parts.append("Pattern: Pointing gesture")
|
| 226 |
+
elif finger_states['thumb'] and finger_states['index'] and distances['thumb_index_distance'] < 0.1:
|
| 227 |
+
description_parts.append("Pattern: Pinch gesture")
|
| 228 |
+
|
| 229 |
+
return "; ".join(description_parts)
|
| 230 |
+
|
| 231 |
+
def extract_features_vector(self, hand_landmarks: Dict[str, Any]) -> np.ndarray:
|
| 232 |
+
"""
|
| 233 |
+
Extract numerical feature vector for machine learning models.
|
| 234 |
+
|
| 235 |
+
Args:
|
| 236 |
+
hand_landmarks: Hand landmark data from MediaPipe
|
| 237 |
+
|
| 238 |
+
Returns:
|
| 239 |
+
NumPy array of features
|
| 240 |
+
"""
|
| 241 |
+
normalized_landmarks = self.normalize_landmarks(hand_landmarks)
|
| 242 |
+
finger_states = self.extract_finger_states(normalized_landmarks)
|
| 243 |
+
angles = self.calculate_angles(normalized_landmarks)
|
| 244 |
+
distances = self.extract_distances(normalized_landmarks)
|
| 245 |
+
|
| 246 |
+
# Create feature vector
|
| 247 |
+
features = []
|
| 248 |
+
|
| 249 |
+
# Finger states (5 features)
|
| 250 |
+
for finger in ['thumb', 'index', 'middle', 'ring', 'pinky']:
|
| 251 |
+
features.append(1.0 if finger_states[finger] else 0.0)
|
| 252 |
+
|
| 253 |
+
# Angles (2 features)
|
| 254 |
+
features.extend([
|
| 255 |
+
angles['thumb_index_angle'] / 180.0, # Normalize to 0-1
|
| 256 |
+
angles['palm_orientation'] / 180.0
|
| 257 |
+
])
|
| 258 |
+
|
| 259 |
+
# Distances (7 features)
|
| 260 |
+
features.extend([
|
| 261 |
+
distances['thumb_index_distance'],
|
| 262 |
+
distances['index_middle_distance'],
|
| 263 |
+
distances['wrist_thumb_distance'],
|
| 264 |
+
distances['wrist_index_distance'],
|
| 265 |
+
distances['wrist_middle_distance'],
|
| 266 |
+
distances['wrist_ring_distance'],
|
| 267 |
+
distances['wrist_pinky_distance']
|
| 268 |
+
])
|
| 269 |
+
|
| 270 |
+
return np.array(features)
|
src/src/hand_detector.py
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Hand Landmark Detection Module using MediaPipe
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import cv2
|
| 6 |
+
import mediapipe as mp
|
| 7 |
+
import numpy as np
|
| 8 |
+
from typing import List, Tuple, Optional, Dict, Any
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class HandDetector:
|
| 12 |
+
"""
|
| 13 |
+
A class for detecting hand landmarks using MediaPipe Hands.
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
def __init__(self,
|
| 17 |
+
static_image_mode: bool = False,
|
| 18 |
+
max_num_hands: int = 2,
|
| 19 |
+
min_detection_confidence: float = 0.5, # Lowered for better detection
|
| 20 |
+
min_tracking_confidence: float = 0.3): # Lowered for better detection
|
| 21 |
+
"""
|
| 22 |
+
Initialize the HandDetector.
|
| 23 |
+
|
| 24 |
+
Args:
|
| 25 |
+
static_image_mode: Whether to treat input as static images
|
| 26 |
+
max_num_hands: Maximum number of hands to detect
|
| 27 |
+
min_detection_confidence: Minimum confidence for hand detection
|
| 28 |
+
min_tracking_confidence: Minimum confidence for hand tracking
|
| 29 |
+
"""
|
| 30 |
+
self.static_image_mode = static_image_mode
|
| 31 |
+
self.max_num_hands = max_num_hands
|
| 32 |
+
self.min_detection_confidence = min_detection_confidence
|
| 33 |
+
self.min_tracking_confidence = min_tracking_confidence
|
| 34 |
+
|
| 35 |
+
# Initialize MediaPipe hands
|
| 36 |
+
self.mp_hands = mp.solutions.hands
|
| 37 |
+
self.hands = self.mp_hands.Hands(
|
| 38 |
+
static_image_mode=self.static_image_mode,
|
| 39 |
+
max_num_hands=self.max_num_hands,
|
| 40 |
+
min_detection_confidence=self.min_detection_confidence,
|
| 41 |
+
min_tracking_confidence=self.min_tracking_confidence
|
| 42 |
+
)
|
| 43 |
+
self.mp_drawing = mp.solutions.drawing_utils
|
| 44 |
+
self.mp_drawing_styles = mp.solutions.drawing_styles
|
| 45 |
+
|
| 46 |
+
def detect_hands(self, image: np.ndarray) -> Tuple[np.ndarray, List[Dict[str, Any]]]:
|
| 47 |
+
"""
|
| 48 |
+
Detect hands in the given image.
|
| 49 |
+
|
| 50 |
+
Args:
|
| 51 |
+
image: Input image as numpy array (BGR format)
|
| 52 |
+
|
| 53 |
+
Returns:
|
| 54 |
+
Tuple of (annotated_image, hand_landmarks_list)
|
| 55 |
+
"""
|
| 56 |
+
# Convert BGR to RGB
|
| 57 |
+
rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
| 58 |
+
|
| 59 |
+
# Process the image
|
| 60 |
+
results = self.hands.process(rgb_image)
|
| 61 |
+
|
| 62 |
+
# Create a copy of the image for annotation
|
| 63 |
+
annotated_image = image.copy()
|
| 64 |
+
|
| 65 |
+
hand_landmarks_list = []
|
| 66 |
+
|
| 67 |
+
if results.multi_hand_landmarks:
|
| 68 |
+
for idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
|
| 69 |
+
# Get hand classification (Left/Right)
|
| 70 |
+
hand_label = "Unknown"
|
| 71 |
+
if results.multi_handedness:
|
| 72 |
+
hand_label = results.multi_handedness[idx].classification[0].label
|
| 73 |
+
|
| 74 |
+
# Draw landmarks on the image
|
| 75 |
+
self.mp_drawing.draw_landmarks(
|
| 76 |
+
annotated_image,
|
| 77 |
+
hand_landmarks,
|
| 78 |
+
self.mp_hands.HAND_CONNECTIONS,
|
| 79 |
+
self.mp_drawing_styles.get_default_hand_landmarks_style(),
|
| 80 |
+
self.mp_drawing_styles.get_default_hand_connections_style()
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
# Extract landmark coordinates
|
| 84 |
+
landmarks = []
|
| 85 |
+
for landmark in hand_landmarks.landmark:
|
| 86 |
+
landmarks.append({
|
| 87 |
+
'x': landmark.x,
|
| 88 |
+
'y': landmark.y,
|
| 89 |
+
'z': landmark.z
|
| 90 |
+
})
|
| 91 |
+
|
| 92 |
+
hand_data = {
|
| 93 |
+
'label': hand_label,
|
| 94 |
+
'landmarks': landmarks,
|
| 95 |
+
'confidence': results.multi_handedness[idx].classification[0].score if results.multi_handedness else 0.0
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
hand_landmarks_list.append(hand_data)
|
| 99 |
+
|
| 100 |
+
return annotated_image, hand_landmarks_list
|
| 101 |
+
|
| 102 |
+
def get_landmark_positions(self, hand_landmarks: List[Dict[str, Any]],
|
| 103 |
+
image_width: int, image_height: int) -> List[Tuple[int, int]]:
|
| 104 |
+
"""
|
| 105 |
+
Convert normalized landmarks to pixel coordinates.
|
| 106 |
+
|
| 107 |
+
Args:
|
| 108 |
+
hand_landmarks: List of hand landmark data
|
| 109 |
+
image_width: Width of the image
|
| 110 |
+
image_height: Height of the image
|
| 111 |
+
|
| 112 |
+
Returns:
|
| 113 |
+
List of (x, y) pixel coordinates
|
| 114 |
+
"""
|
| 115 |
+
positions = []
|
| 116 |
+
for hand_data in hand_landmarks:
|
| 117 |
+
hand_positions = []
|
| 118 |
+
for landmark in hand_data['landmarks']:
|
| 119 |
+
x = int(landmark['x'] * image_width)
|
| 120 |
+
y = int(landmark['y'] * image_height)
|
| 121 |
+
hand_positions.append((x, y))
|
| 122 |
+
positions.append(hand_positions)
|
| 123 |
+
|
| 124 |
+
return positions
|
| 125 |
+
|
| 126 |
+
def get_bounding_box(self, hand_landmarks: Dict[str, Any],
|
| 127 |
+
image_width: int, image_height: int) -> Tuple[int, int, int, int]:
|
| 128 |
+
"""
|
| 129 |
+
Get bounding box for detected hand.
|
| 130 |
+
|
| 131 |
+
Args:
|
| 132 |
+
hand_landmarks: Hand landmark data
|
| 133 |
+
image_width: Width of the image
|
| 134 |
+
image_height: Height of the image
|
| 135 |
+
|
| 136 |
+
Returns:
|
| 137 |
+
Tuple of (x_min, y_min, x_max, y_max)
|
| 138 |
+
"""
|
| 139 |
+
x_coords = [landmark['x'] * image_width for landmark in hand_landmarks['landmarks']]
|
| 140 |
+
y_coords = [landmark['y'] * image_height for landmark in hand_landmarks['landmarks']]
|
| 141 |
+
|
| 142 |
+
x_min = int(min(x_coords))
|
| 143 |
+
y_min = int(min(y_coords))
|
| 144 |
+
x_max = int(max(x_coords))
|
| 145 |
+
y_max = int(max(y_coords))
|
| 146 |
+
|
| 147 |
+
return x_min, y_min, x_max, y_max
|
| 148 |
+
|
| 149 |
+
def is_hand_closed(self, hand_landmarks: Dict[str, Any]) -> bool:
|
| 150 |
+
"""
|
| 151 |
+
Simple heuristic to determine if hand is closed (fist).
|
| 152 |
+
|
| 153 |
+
Args:
|
| 154 |
+
hand_landmarks: Hand landmark data
|
| 155 |
+
|
| 156 |
+
Returns:
|
| 157 |
+
Boolean indicating if hand appears closed
|
| 158 |
+
"""
|
| 159 |
+
landmarks = hand_landmarks['landmarks']
|
| 160 |
+
|
| 161 |
+
# Check if fingertips are below their respective PIP joints
|
| 162 |
+
# Thumb: tip (4) vs IP (3)
|
| 163 |
+
# Index: tip (8) vs PIP (6)
|
| 164 |
+
# Middle: tip (12) vs PIP (10)
|
| 165 |
+
# Ring: tip (16) vs PIP (14)
|
| 166 |
+
# Pinky: tip (20) vs PIP (18)
|
| 167 |
+
|
| 168 |
+
finger_tips = [4, 8, 12, 16, 20]
|
| 169 |
+
finger_pips = [3, 6, 10, 14, 18]
|
| 170 |
+
|
| 171 |
+
closed_fingers = 0
|
| 172 |
+
|
| 173 |
+
for tip, pip in zip(finger_tips, finger_pips):
|
| 174 |
+
if landmarks[tip]['y'] > landmarks[pip]['y']: # tip below pip
|
| 175 |
+
closed_fingers += 1
|
| 176 |
+
|
| 177 |
+
# Consider hand closed if 4 or more fingers are closed
|
| 178 |
+
return closed_fingers >= 4
|
| 179 |
+
|
| 180 |
+
def cleanup(self):
|
| 181 |
+
"""
|
| 182 |
+
Clean up MediaPipe resources.
|
| 183 |
+
"""
|
| 184 |
+
if hasattr(self, 'hands'):
|
| 185 |
+
self.hands.close()
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
# Landmark indices for reference
|
| 189 |
+
HAND_LANDMARKS = {
|
| 190 |
+
'WRIST': 0,
|
| 191 |
+
'THUMB_CMC': 1, 'THUMB_MCP': 2, 'THUMB_IP': 3, 'THUMB_TIP': 4,
|
| 192 |
+
'INDEX_FINGER_MCP': 5, 'INDEX_FINGER_PIP': 6, 'INDEX_FINGER_DIP': 7, 'INDEX_FINGER_TIP': 8,
|
| 193 |
+
'MIDDLE_FINGER_MCP': 9, 'MIDDLE_FINGER_PIP': 10, 'MIDDLE_FINGER_DIP': 11, 'MIDDLE_FINGER_TIP': 12,
|
| 194 |
+
'RING_FINGER_MCP': 13, 'RING_FINGER_PIP': 14, 'RING_FINGER_DIP': 15, 'RING_FINGER_TIP': 16,
|
| 195 |
+
'PINKY_MCP': 17, 'PINKY_PIP': 18, 'PINKY_DIP': 19, 'PINKY_TIP': 20
|
| 196 |
+
}
|
src/src/openai_classifier.py
ADDED
|
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
OpenAI API Integration for Sign Language Classification
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from openai import OpenAI
|
| 6 |
+
import os
|
| 7 |
+
from typing import List, Dict, Any, Optional
|
| 8 |
+
import json
|
| 9 |
+
import time
|
| 10 |
+
from dotenv import load_dotenv
|
| 11 |
+
from .fallback_classifier import FallbackSignLanguageClassifier
|
| 12 |
+
|
| 13 |
+
# Load environment variables
|
| 14 |
+
load_dotenv()
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class SignLanguageClassifier:
|
| 18 |
+
"""
|
| 19 |
+
A class for classifying sign language gestures using OpenAI API.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
def __init__(self, api_key: Optional[str] = None, model: str = "gpt-3.5-turbo"):
|
| 23 |
+
"""
|
| 24 |
+
Initialize the SignLanguageClassifier.
|
| 25 |
+
|
| 26 |
+
Args:
|
| 27 |
+
api_key: OpenAI API key (if None, will use environment variable)
|
| 28 |
+
model: OpenAI model to use for classification
|
| 29 |
+
"""
|
| 30 |
+
self.api_key = api_key or os.getenv('OPENAI_API_KEY')
|
| 31 |
+
self.model = model
|
| 32 |
+
|
| 33 |
+
if not self.api_key:
|
| 34 |
+
raise ValueError("OpenAI API key not provided. Set OPENAI_API_KEY environment variable or pass api_key parameter.")
|
| 35 |
+
|
| 36 |
+
# Initialize OpenAI client with new format
|
| 37 |
+
self.client = OpenAI(api_key=self.api_key)
|
| 38 |
+
|
| 39 |
+
# Rate limiting
|
| 40 |
+
self.last_request_time = 0
|
| 41 |
+
self.min_request_interval = 1.0 # Minimum seconds between requests
|
| 42 |
+
|
| 43 |
+
# Debug mode
|
| 44 |
+
self.debug = True
|
| 45 |
+
|
| 46 |
+
# Initialize fallback classifier
|
| 47 |
+
self.fallback_classifier = FallbackSignLanguageClassifier()
|
| 48 |
+
|
| 49 |
+
print(f"OpenAI classifier initialized with fallback support")
|
| 50 |
+
|
| 51 |
+
def _rate_limit(self):
|
| 52 |
+
"""Implement simple rate limiting."""
|
| 53 |
+
current_time = time.time()
|
| 54 |
+
time_since_last = current_time - self.last_request_time
|
| 55 |
+
|
| 56 |
+
if time_since_last < self.min_request_interval:
|
| 57 |
+
time.sleep(self.min_request_interval - time_since_last)
|
| 58 |
+
|
| 59 |
+
self.last_request_time = time.time()
|
| 60 |
+
|
| 61 |
+
def classify_gesture(self, gesture_description: str,
|
| 62 |
+
sign_language: str = "ASL",
|
| 63 |
+
context: Optional[str] = None) -> Dict[str, Any]:
|
| 64 |
+
"""
|
| 65 |
+
Classify a gesture using OpenAI API.
|
| 66 |
+
|
| 67 |
+
Args:
|
| 68 |
+
gesture_description: Textual description of the gesture
|
| 69 |
+
sign_language: Type of sign language (ASL, ISL, etc.)
|
| 70 |
+
context: Additional context for classification
|
| 71 |
+
|
| 72 |
+
Returns:
|
| 73 |
+
Dictionary containing classification results
|
| 74 |
+
"""
|
| 75 |
+
self._rate_limit()
|
| 76 |
+
|
| 77 |
+
# Create the prompt
|
| 78 |
+
prompt = self._create_classification_prompt(gesture_description, sign_language, context)
|
| 79 |
+
|
| 80 |
+
if self.debug:
|
| 81 |
+
print(f"\n=== OpenAI Classification Debug ===")
|
| 82 |
+
print(f"Input gesture description: {gesture_description}")
|
| 83 |
+
print(f"Prompt sent to OpenAI: {prompt}")
|
| 84 |
+
|
| 85 |
+
try:
|
| 86 |
+
response = self.client.chat.completions.create(
|
| 87 |
+
model=self.model,
|
| 88 |
+
messages=[
|
| 89 |
+
{"role": "system", "content": self._get_system_prompt(sign_language)},
|
| 90 |
+
{"role": "user", "content": prompt}
|
| 91 |
+
],
|
| 92 |
+
max_tokens=200,
|
| 93 |
+
temperature=0.3,
|
| 94 |
+
top_p=0.9
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
response_content = response.choices[0].message.content
|
| 98 |
+
|
| 99 |
+
if self.debug:
|
| 100 |
+
print(f"OpenAI response: {response_content}")
|
| 101 |
+
|
| 102 |
+
result = self._parse_response(response_content)
|
| 103 |
+
result['raw_response'] = response_content
|
| 104 |
+
result['success'] = True
|
| 105 |
+
|
| 106 |
+
if self.debug:
|
| 107 |
+
print(f"Parsed result: {result}")
|
| 108 |
+
print("=== End Debug ===\n")
|
| 109 |
+
|
| 110 |
+
return result
|
| 111 |
+
|
| 112 |
+
except Exception as e:
|
| 113 |
+
error_msg = str(e)
|
| 114 |
+
if self.debug:
|
| 115 |
+
print(f"OpenAI API Error: {error_msg}")
|
| 116 |
+
print("Falling back to pattern-based classification...")
|
| 117 |
+
|
| 118 |
+
# Use fallback classifier when OpenAI API fails
|
| 119 |
+
try:
|
| 120 |
+
fallback_result = self.fallback_classifier.classify_gesture(
|
| 121 |
+
gesture_description, sign_language, context
|
| 122 |
+
)
|
| 123 |
+
fallback_result['fallback_used'] = True
|
| 124 |
+
fallback_result['openai_error'] = error_msg
|
| 125 |
+
|
| 126 |
+
if self.debug:
|
| 127 |
+
print(f"Fallback result: {fallback_result}")
|
| 128 |
+
print("=== End Debug ===\n")
|
| 129 |
+
|
| 130 |
+
return fallback_result
|
| 131 |
+
|
| 132 |
+
except Exception as fallback_error:
|
| 133 |
+
if self.debug:
|
| 134 |
+
print(f"Fallback also failed: {str(fallback_error)}")
|
| 135 |
+
print("=== End Debug ===\n")
|
| 136 |
+
|
| 137 |
+
return {
|
| 138 |
+
'success': False,
|
| 139 |
+
'error': error_msg,
|
| 140 |
+
'fallback_error': str(fallback_error),
|
| 141 |
+
'letter': None,
|
| 142 |
+
'word': None,
|
| 143 |
+
'confidence': 0.0,
|
| 144 |
+
'description': None
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
def classify_sequence(self, gesture_descriptions: List[str],
|
| 148 |
+
sign_language: str = "ASL") -> Dict[str, Any]:
|
| 149 |
+
"""
|
| 150 |
+
Classify a sequence of gestures to form words or sentences.
|
| 151 |
+
|
| 152 |
+
Args:
|
| 153 |
+
gesture_descriptions: List of gesture descriptions
|
| 154 |
+
sign_language: Type of sign language
|
| 155 |
+
|
| 156 |
+
Returns:
|
| 157 |
+
Dictionary containing sequence classification results
|
| 158 |
+
"""
|
| 159 |
+
self._rate_limit()
|
| 160 |
+
|
| 161 |
+
# Create sequence prompt
|
| 162 |
+
prompt = self._create_sequence_prompt(gesture_descriptions, sign_language)
|
| 163 |
+
|
| 164 |
+
try:
|
| 165 |
+
response = self.client.chat.completions.create(
|
| 166 |
+
model=self.model,
|
| 167 |
+
messages=[
|
| 168 |
+
{"role": "system", "content": self._get_sequence_system_prompt(sign_language)},
|
| 169 |
+
{"role": "user", "content": prompt}
|
| 170 |
+
],
|
| 171 |
+
max_tokens=300,
|
| 172 |
+
temperature=0.3,
|
| 173 |
+
top_p=0.9
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
result = self._parse_sequence_response(response.choices[0].message.content)
|
| 177 |
+
result['raw_response'] = response.choices[0].message.content
|
| 178 |
+
result['success'] = True
|
| 179 |
+
|
| 180 |
+
return result
|
| 181 |
+
|
| 182 |
+
except Exception as e:
|
| 183 |
+
# Use fallback for sequence classification too
|
| 184 |
+
try:
|
| 185 |
+
fallback_result = self.fallback_classifier.classify_sequence(
|
| 186 |
+
gesture_descriptions, sign_language
|
| 187 |
+
)
|
| 188 |
+
fallback_result['fallback_used'] = True
|
| 189 |
+
fallback_result['openai_error'] = str(e)
|
| 190 |
+
return fallback_result
|
| 191 |
+
|
| 192 |
+
except Exception as fallback_error:
|
| 193 |
+
return {
|
| 194 |
+
'success': False,
|
| 195 |
+
'error': str(e),
|
| 196 |
+
'fallback_error': str(fallback_error),
|
| 197 |
+
'word': None,
|
| 198 |
+
'sentence': None,
|
| 199 |
+
'confidence': 0.0
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
def _get_system_prompt(self, sign_language: str) -> str:
|
| 203 |
+
"""Get system prompt for gesture classification."""
|
| 204 |
+
return f"""You are an expert in {sign_language} (American Sign Language) recognition.
|
| 205 |
+
Your task is to provide ONE CLEAR PREDICTION for each hand gesture.
|
| 206 |
+
|
| 207 |
+
PRIORITY ORDER:
|
| 208 |
+
1. If it's a complete word sign (like "HELLO", "HUNGRY", "THANK YOU"), identify the WORD
|
| 209 |
+
2. If it's a letter/number sign, identify the LETTER or NUMBER
|
| 210 |
+
3. If uncertain, provide your best single guess
|
| 211 |
+
|
| 212 |
+
Respond in JSON format:
|
| 213 |
+
{{
|
| 214 |
+
"letter": "A" or null,
|
| 215 |
+
"word": "HUNGRY" or null,
|
| 216 |
+
"confidence": 0.85,
|
| 217 |
+
"description": "Brief explanation"
|
| 218 |
+
}}
|
| 219 |
+
|
| 220 |
+
IMPORTANT RULES:
|
| 221 |
+
- Provide either a letter OR a word, not both
|
| 222 |
+
- Words take priority over letters
|
| 223 |
+
- Be decisive - give your best single prediction
|
| 224 |
+
- Common words: HELLO, HUNGRY, THANK YOU, PLEASE, SORRY, YES, NO, I, YOU, LOVE, etc.
|
| 225 |
+
- Letters: A-Z, Numbers: 0-9
|
| 226 |
+
- Confidence should reflect your certainty (0.1 = very uncertain, 0.9 = very certain)
|
| 227 |
+
|
| 228 |
+
Focus on the most likely single interpretation of the gesture."""
|
| 229 |
+
|
| 230 |
+
def _get_sequence_system_prompt(self, sign_language: str) -> str:
|
| 231 |
+
"""Get system prompt for sequence classification."""
|
| 232 |
+
return f"""You are an expert in {sign_language} recognition specializing in interpreting sequences of gestures.
|
| 233 |
+
Your task is to analyze a sequence of hand gestures and determine if they form a word or sentence.
|
| 234 |
+
|
| 235 |
+
Respond in JSON format:
|
| 236 |
+
{{
|
| 237 |
+
"word": "HELLO" or null,
|
| 238 |
+
"sentence": "HELLO WORLD" or null,
|
| 239 |
+
"confidence": 0.85,
|
| 240 |
+
"individual_letters": ["H", "E", "L", "L", "O"]
|
| 241 |
+
}}
|
| 242 |
+
|
| 243 |
+
Consider:
|
| 244 |
+
- Sequential letter spelling
|
| 245 |
+
- Common {sign_language} words and phrases
|
| 246 |
+
- Context and flow between gestures"""
|
| 247 |
+
|
| 248 |
+
def _create_classification_prompt(self, gesture_description: str,
|
| 249 |
+
sign_language: str, context: Optional[str]) -> str:
|
| 250 |
+
"""Create enhanced prompt for single gesture classification."""
|
| 251 |
+
prompt = f"""You are an expert ASL (American Sign Language) interpreter. Analyze this hand gesture and provide ONE CLEAR PREDICTION.
|
| 252 |
+
|
| 253 |
+
GESTURE DATA:
|
| 254 |
+
{gesture_description}
|
| 255 |
+
|
| 256 |
+
TASK: Identify what this gesture represents. Respond with EXACTLY ONE of these:
|
| 257 |
+
- A single letter (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y, Z)
|
| 258 |
+
- A single number (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)
|
| 259 |
+
- A complete word (HELLO, HUNGRY, THANK YOU, PLEASE, SORRY, YES, NO, I, YOU, LOVE, HELP, MORE, WATER, EAT, DRINK, etc.)
|
| 260 |
+
|
| 261 |
+
PRIORITY: If this could be a word sign, choose the WORD. If it's clearly a letter/number, choose that.
|
| 262 |
+
|
| 263 |
+
COMMON ASL PATTERNS:
|
| 264 |
+
- Closed fist = A, S, or numbers
|
| 265 |
+
- Open hand = 5, HELLO, or STOP
|
| 266 |
+
- Pointing = 1, I, or YOU
|
| 267 |
+
- Pinch gesture = F, 9, or SMALL
|
| 268 |
+
|
| 269 |
+
"""
|
| 270 |
+
|
| 271 |
+
if context:
|
| 272 |
+
prompt += f"Context: {context}\n\n"
|
| 273 |
+
|
| 274 |
+
prompt += """Respond in this EXACT JSON format:
|
| 275 |
+
{
|
| 276 |
+
"letter": "A" or null,
|
| 277 |
+
"word": "HELLO" or null,
|
| 278 |
+
"confidence": 0.85,
|
| 279 |
+
"description": "Brief explanation"
|
| 280 |
+
}
|
| 281 |
+
|
| 282 |
+
Be decisive and confident in your single prediction."""
|
| 283 |
+
|
| 284 |
+
return prompt
|
| 285 |
+
|
| 286 |
+
def _create_sequence_prompt(self, gesture_descriptions: List[str],
|
| 287 |
+
sign_language: str) -> str:
|
| 288 |
+
"""Create prompt for gesture sequence classification."""
|
| 289 |
+
prompt = f"""Analyze this sequence of {sign_language} hand gestures:
|
| 290 |
+
|
| 291 |
+
"""
|
| 292 |
+
|
| 293 |
+
for i, description in enumerate(gesture_descriptions, 1):
|
| 294 |
+
prompt += f"Gesture {i}: {description}\n"
|
| 295 |
+
|
| 296 |
+
prompt += f"""
|
| 297 |
+
What word or sentence do these {sign_language} gestures spell out when combined?
|
| 298 |
+
Consider the sequence and flow of the gestures."""
|
| 299 |
+
|
| 300 |
+
return prompt
|
| 301 |
+
|
| 302 |
+
def _parse_response(self, response_text: str) -> Dict[str, Any]:
|
| 303 |
+
"""Parse OpenAI response for single gesture classification."""
|
| 304 |
+
try:
|
| 305 |
+
# Try to parse as JSON first
|
| 306 |
+
if '{' in response_text and '}' in response_text:
|
| 307 |
+
json_start = response_text.find('{')
|
| 308 |
+
json_end = response_text.rfind('}') + 1
|
| 309 |
+
json_str = response_text[json_start:json_end]
|
| 310 |
+
result = json.loads(json_str)
|
| 311 |
+
|
| 312 |
+
# Ensure required fields exist
|
| 313 |
+
return {
|
| 314 |
+
'letter': result.get('letter'),
|
| 315 |
+
'word': result.get('word'),
|
| 316 |
+
'confidence': float(result.get('confidence', 0.0)),
|
| 317 |
+
'description': result.get('description', '')
|
| 318 |
+
}
|
| 319 |
+
else:
|
| 320 |
+
# Fallback: simple text parsing
|
| 321 |
+
return self._parse_text_response(response_text)
|
| 322 |
+
|
| 323 |
+
except (json.JSONDecodeError, ValueError):
|
| 324 |
+
return self._parse_text_response(response_text)
|
| 325 |
+
|
| 326 |
+
def _parse_sequence_response(self, response_text: str) -> Dict[str, Any]:
|
| 327 |
+
"""Parse OpenAI response for sequence classification."""
|
| 328 |
+
try:
|
| 329 |
+
if '{' in response_text and '}' in response_text:
|
| 330 |
+
json_start = response_text.find('{')
|
| 331 |
+
json_end = response_text.rfind('}') + 1
|
| 332 |
+
json_str = response_text[json_start:json_end]
|
| 333 |
+
result = json.loads(json_str)
|
| 334 |
+
|
| 335 |
+
return {
|
| 336 |
+
'word': result.get('word'),
|
| 337 |
+
'sentence': result.get('sentence'),
|
| 338 |
+
'confidence': float(result.get('confidence', 0.0)),
|
| 339 |
+
'individual_letters': result.get('individual_letters', [])
|
| 340 |
+
}
|
| 341 |
+
else:
|
| 342 |
+
return self._parse_sequence_text_response(response_text)
|
| 343 |
+
|
| 344 |
+
except (json.JSONDecodeError, ValueError):
|
| 345 |
+
return self._parse_sequence_text_response(response_text)
|
| 346 |
+
|
| 347 |
+
def _parse_text_response(self, response_text: str) -> Dict[str, Any]:
|
| 348 |
+
"""Enhanced fallback text parsing for single gesture."""
|
| 349 |
+
response_lower = response_text.lower()
|
| 350 |
+
|
| 351 |
+
# Common ASL words to look for
|
| 352 |
+
common_words = ['hello', 'hungry', 'thank you', 'please', 'sorry', 'yes', 'no',
|
| 353 |
+
'i', 'you', 'love', 'help', 'more', 'water', 'eat', 'drink',
|
| 354 |
+
'good', 'bad', 'happy', 'sad', 'stop', 'go', 'come', 'home']
|
| 355 |
+
|
| 356 |
+
# Look for words first (priority)
|
| 357 |
+
word = None
|
| 358 |
+
for w in common_words:
|
| 359 |
+
if w in response_lower:
|
| 360 |
+
word = w.upper()
|
| 361 |
+
break
|
| 362 |
+
|
| 363 |
+
# Look for letter patterns
|
| 364 |
+
letter = None
|
| 365 |
+
if not word: # Only look for letters if no word found
|
| 366 |
+
import re
|
| 367 |
+
letter_match = re.search(r'letter\s*[:\-]?\s*([a-z])', response_lower)
|
| 368 |
+
if letter_match:
|
| 369 |
+
letter = letter_match.group(1).upper()
|
| 370 |
+
|
| 371 |
+
# Look for word patterns
|
| 372 |
+
word = None
|
| 373 |
+
if 'word' in response_lower:
|
| 374 |
+
word_match = re.search(r'word\s*[:\-]?\s*([a-z]+)', response_lower)
|
| 375 |
+
if word_match:
|
| 376 |
+
word = word_match.group(1).upper()
|
| 377 |
+
|
| 378 |
+
return {
|
| 379 |
+
'letter': letter,
|
| 380 |
+
'word': word,
|
| 381 |
+
'confidence': 0.5, # Default confidence for text parsing
|
| 382 |
+
'description': response_text[:100] # First 100 chars
|
| 383 |
+
}
|
| 384 |
+
|
| 385 |
+
def _parse_sequence_text_response(self, response_text: str) -> Dict[str, Any]:
|
| 386 |
+
"""Fallback text parsing for sequence."""
|
| 387 |
+
return {
|
| 388 |
+
'word': None,
|
| 389 |
+
'sentence': None,
|
| 390 |
+
'confidence': 0.5,
|
| 391 |
+
'individual_letters': []
|
| 392 |
+
}
|
src/src/output_handler.py
ADDED
|
@@ -0,0 +1,391 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Output Display and Speech Synthesis Module
|
| 3 |
+
Handles text display and text-to-speech functionality
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import pyttsx3
|
| 7 |
+
import threading
|
| 8 |
+
import time
|
| 9 |
+
import os
|
| 10 |
+
from typing import List, Dict, Any, Optional, Callable
|
| 11 |
+
from queue import Queue, Empty
|
| 12 |
+
import json
|
| 13 |
+
from datetime import datetime
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class OutputHandler:
|
| 17 |
+
"""
|
| 18 |
+
Handles text display and speech synthesis for sign language detection results.
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
def __init__(self,
|
| 22 |
+
enable_speech: bool = True,
|
| 23 |
+
speech_rate: int = 150,
|
| 24 |
+
speech_volume: float = 0.9,
|
| 25 |
+
save_transcript: bool = True,
|
| 26 |
+
transcript_file: str = "sign_language_transcript.txt"):
|
| 27 |
+
"""
|
| 28 |
+
Initialize the OutputHandler.
|
| 29 |
+
|
| 30 |
+
Args:
|
| 31 |
+
enable_speech: Whether to enable text-to-speech
|
| 32 |
+
speech_rate: Speech rate (words per minute)
|
| 33 |
+
speech_volume: Speech volume (0.0 to 1.0)
|
| 34 |
+
save_transcript: Whether to save transcript to file
|
| 35 |
+
transcript_file: Path to transcript file
|
| 36 |
+
"""
|
| 37 |
+
self.enable_speech = enable_speech
|
| 38 |
+
self.speech_rate = speech_rate
|
| 39 |
+
self.speech_volume = speech_volume
|
| 40 |
+
self.save_transcript = save_transcript
|
| 41 |
+
self.transcript_file = transcript_file
|
| 42 |
+
|
| 43 |
+
# Initialize TTS engine
|
| 44 |
+
self.tts_engine = None
|
| 45 |
+
self.tts_thread = None
|
| 46 |
+
self.speech_queue = Queue()
|
| 47 |
+
self.is_speaking = False
|
| 48 |
+
|
| 49 |
+
# Transcript storage
|
| 50 |
+
self.transcript = []
|
| 51 |
+
self.current_session_start = datetime.now()
|
| 52 |
+
|
| 53 |
+
# Display callbacks
|
| 54 |
+
self.display_callbacks = []
|
| 55 |
+
|
| 56 |
+
# Initialize TTS if enabled
|
| 57 |
+
if self.enable_speech:
|
| 58 |
+
self._initialize_tts()
|
| 59 |
+
|
| 60 |
+
def _initialize_tts(self) -> bool:
|
| 61 |
+
"""
|
| 62 |
+
Initialize the text-to-speech engine.
|
| 63 |
+
|
| 64 |
+
Returns:
|
| 65 |
+
True if initialized successfully, False otherwise
|
| 66 |
+
"""
|
| 67 |
+
try:
|
| 68 |
+
self.tts_engine = pyttsx3.init()
|
| 69 |
+
|
| 70 |
+
# Set properties
|
| 71 |
+
self.tts_engine.setProperty('rate', self.speech_rate)
|
| 72 |
+
self.tts_engine.setProperty('volume', self.speech_volume)
|
| 73 |
+
|
| 74 |
+
# Get available voices
|
| 75 |
+
voices = self.tts_engine.getProperty('voices')
|
| 76 |
+
if voices:
|
| 77 |
+
# Try to use a female voice if available
|
| 78 |
+
for voice in voices:
|
| 79 |
+
if 'female' in voice.name.lower() or 'woman' in voice.name.lower():
|
| 80 |
+
self.tts_engine.setProperty('voice', voice.id)
|
| 81 |
+
break
|
| 82 |
+
else:
|
| 83 |
+
# Use first available voice
|
| 84 |
+
self.tts_engine.setProperty('voice', voices[0].id)
|
| 85 |
+
|
| 86 |
+
# Start TTS thread
|
| 87 |
+
self.tts_thread = threading.Thread(target=self._tts_worker, daemon=True)
|
| 88 |
+
self.tts_thread.start()
|
| 89 |
+
|
| 90 |
+
print("Text-to-speech initialized successfully")
|
| 91 |
+
return True
|
| 92 |
+
|
| 93 |
+
except Exception as e:
|
| 94 |
+
print(f"Error initializing TTS: {e}")
|
| 95 |
+
self.enable_speech = False
|
| 96 |
+
return False
|
| 97 |
+
|
| 98 |
+
def _tts_worker(self):
|
| 99 |
+
"""TTS worker thread that processes speech queue."""
|
| 100 |
+
while True:
|
| 101 |
+
try:
|
| 102 |
+
text = self.speech_queue.get(timeout=1.0)
|
| 103 |
+
if text is None: # Shutdown signal
|
| 104 |
+
break
|
| 105 |
+
|
| 106 |
+
self.is_speaking = True
|
| 107 |
+
self.tts_engine.say(text)
|
| 108 |
+
self.tts_engine.runAndWait()
|
| 109 |
+
self.is_speaking = False
|
| 110 |
+
|
| 111 |
+
except Empty:
|
| 112 |
+
continue
|
| 113 |
+
except Exception as e:
|
| 114 |
+
print(f"Error in TTS worker: {e}")
|
| 115 |
+
self.is_speaking = False
|
| 116 |
+
|
| 117 |
+
def add_display_callback(self, callback: Callable):
|
| 118 |
+
"""
|
| 119 |
+
Add a callback function for display updates.
|
| 120 |
+
|
| 121 |
+
Args:
|
| 122 |
+
callback: Function to call when display should be updated
|
| 123 |
+
"""
|
| 124 |
+
self.display_callbacks.append(callback)
|
| 125 |
+
|
| 126 |
+
def display_detection(self, detection: Dict[str, Any], speak: bool = True):
|
| 127 |
+
"""
|
| 128 |
+
Display and optionally speak a gesture detection result.
|
| 129 |
+
|
| 130 |
+
Args:
|
| 131 |
+
detection: Detection result dictionary
|
| 132 |
+
speak: Whether to speak the result
|
| 133 |
+
"""
|
| 134 |
+
# Extract relevant information
|
| 135 |
+
hand_label = detection.get('hand_label', 'Unknown')
|
| 136 |
+
classification = detection.get('classification', {})
|
| 137 |
+
|
| 138 |
+
if not classification.get('success', False):
|
| 139 |
+
return
|
| 140 |
+
|
| 141 |
+
# Format display text
|
| 142 |
+
display_text = self._format_detection_text(detection)
|
| 143 |
+
|
| 144 |
+
# Add to transcript
|
| 145 |
+
if self.save_transcript:
|
| 146 |
+
self._add_to_transcript(detection, display_text)
|
| 147 |
+
|
| 148 |
+
# Call display callbacks
|
| 149 |
+
for callback in self.display_callbacks:
|
| 150 |
+
try:
|
| 151 |
+
callback(display_text, detection)
|
| 152 |
+
except Exception as e:
|
| 153 |
+
print(f"Error in display callback: {e}")
|
| 154 |
+
|
| 155 |
+
# Speak if enabled and requested
|
| 156 |
+
if speak and self.enable_speech:
|
| 157 |
+
speech_text = self._format_speech_text(detection)
|
| 158 |
+
self.speak(speech_text)
|
| 159 |
+
|
| 160 |
+
# Print to console
|
| 161 |
+
print(f"[{datetime.now().strftime('%H:%M:%S')}] {display_text}")
|
| 162 |
+
|
| 163 |
+
def display_sequence(self, sequence_result: Dict[str, Any], speak: bool = True):
|
| 164 |
+
"""
|
| 165 |
+
Display and optionally speak a gesture sequence result.
|
| 166 |
+
|
| 167 |
+
Args:
|
| 168 |
+
sequence_result: Sequence classification result
|
| 169 |
+
speak: Whether to speak the result
|
| 170 |
+
"""
|
| 171 |
+
if not sequence_result.get('success', False):
|
| 172 |
+
return
|
| 173 |
+
|
| 174 |
+
# Format display text
|
| 175 |
+
display_text = self._format_sequence_text(sequence_result)
|
| 176 |
+
|
| 177 |
+
# Add to transcript
|
| 178 |
+
if self.save_transcript:
|
| 179 |
+
self._add_sequence_to_transcript(sequence_result, display_text)
|
| 180 |
+
|
| 181 |
+
# Call display callbacks
|
| 182 |
+
for callback in self.display_callbacks:
|
| 183 |
+
try:
|
| 184 |
+
callback(display_text, sequence_result)
|
| 185 |
+
except Exception as e:
|
| 186 |
+
print(f"Error in display callback: {e}")
|
| 187 |
+
|
| 188 |
+
# Speak if enabled and requested
|
| 189 |
+
if speak and self.enable_speech:
|
| 190 |
+
speech_text = self._format_sequence_speech_text(sequence_result)
|
| 191 |
+
self.speak(speech_text)
|
| 192 |
+
|
| 193 |
+
# Print to console
|
| 194 |
+
print(f"[{datetime.now().strftime('%H:%M:%S')}] SEQUENCE: {display_text}")
|
| 195 |
+
|
| 196 |
+
def speak(self, text: str):
|
| 197 |
+
"""
|
| 198 |
+
Add text to speech queue.
|
| 199 |
+
|
| 200 |
+
Args:
|
| 201 |
+
text: Text to speak
|
| 202 |
+
"""
|
| 203 |
+
if self.enable_speech and not self.is_speaking:
|
| 204 |
+
self.speech_queue.put(text)
|
| 205 |
+
|
| 206 |
+
def _format_detection_text(self, detection: Dict[str, Any]) -> str:
|
| 207 |
+
"""Format detection result for display."""
|
| 208 |
+
classification = detection.get('classification', {})
|
| 209 |
+
hand_label = detection.get('hand_label', 'Unknown')
|
| 210 |
+
|
| 211 |
+
parts = [f"{hand_label} hand:"]
|
| 212 |
+
|
| 213 |
+
if classification.get('letter'):
|
| 214 |
+
parts.append(f"Letter '{classification['letter']}'")
|
| 215 |
+
|
| 216 |
+
if classification.get('word'):
|
| 217 |
+
parts.append(f"Word '{classification['word']}'")
|
| 218 |
+
|
| 219 |
+
confidence = classification.get('confidence', 0.0)
|
| 220 |
+
if confidence > 0:
|
| 221 |
+
parts.append(f"({confidence:.1%} confidence)")
|
| 222 |
+
|
| 223 |
+
return " ".join(parts)
|
| 224 |
+
|
| 225 |
+
def _format_sequence_text(self, sequence_result: Dict[str, Any]) -> str:
|
| 226 |
+
"""Format sequence result for display."""
|
| 227 |
+
parts = []
|
| 228 |
+
|
| 229 |
+
if sequence_result.get('word'):
|
| 230 |
+
parts.append(f"Word: '{sequence_result['word']}'")
|
| 231 |
+
|
| 232 |
+
if sequence_result.get('sentence'):
|
| 233 |
+
parts.append(f"Sentence: '{sequence_result['sentence']}'")
|
| 234 |
+
|
| 235 |
+
if sequence_result.get('individual_letters'):
|
| 236 |
+
letters = " ".join(sequence_result['individual_letters'])
|
| 237 |
+
parts.append(f"Letters: {letters}")
|
| 238 |
+
|
| 239 |
+
confidence = sequence_result.get('confidence', 0.0)
|
| 240 |
+
if confidence > 0:
|
| 241 |
+
parts.append(f"({confidence:.1%} confidence)")
|
| 242 |
+
|
| 243 |
+
return " | ".join(parts)
|
| 244 |
+
|
| 245 |
+
def _format_speech_text(self, detection: Dict[str, Any]) -> str:
|
| 246 |
+
"""Format detection result for speech."""
|
| 247 |
+
classification = detection.get('classification', {})
|
| 248 |
+
|
| 249 |
+
if classification.get('word'):
|
| 250 |
+
return classification['word']
|
| 251 |
+
elif classification.get('letter'):
|
| 252 |
+
return f"Letter {classification['letter']}"
|
| 253 |
+
else:
|
| 254 |
+
return "Gesture detected"
|
| 255 |
+
|
| 256 |
+
def _format_sequence_speech_text(self, sequence_result: Dict[str, Any]) -> str:
|
| 257 |
+
"""Format sequence result for speech."""
|
| 258 |
+
if sequence_result.get('sentence'):
|
| 259 |
+
return sequence_result['sentence']
|
| 260 |
+
elif sequence_result.get('word'):
|
| 261 |
+
return sequence_result['word']
|
| 262 |
+
else:
|
| 263 |
+
return "Sequence detected"
|
| 264 |
+
|
| 265 |
+
def _add_to_transcript(self, detection: Dict[str, Any], display_text: str):
|
| 266 |
+
"""Add detection to transcript."""
|
| 267 |
+
transcript_entry = {
|
| 268 |
+
'timestamp': datetime.now().isoformat(),
|
| 269 |
+
'type': 'detection',
|
| 270 |
+
'display_text': display_text,
|
| 271 |
+
'detection': detection
|
| 272 |
+
}
|
| 273 |
+
self.transcript.append(transcript_entry)
|
| 274 |
+
|
| 275 |
+
# Save to file periodically
|
| 276 |
+
if len(self.transcript) % 10 == 0:
|
| 277 |
+
self._save_transcript()
|
| 278 |
+
|
| 279 |
+
def _add_sequence_to_transcript(self, sequence_result: Dict[str, Any], display_text: str):
|
| 280 |
+
"""Add sequence to transcript."""
|
| 281 |
+
transcript_entry = {
|
| 282 |
+
'timestamp': datetime.now().isoformat(),
|
| 283 |
+
'type': 'sequence',
|
| 284 |
+
'display_text': display_text,
|
| 285 |
+
'sequence_result': sequence_result
|
| 286 |
+
}
|
| 287 |
+
self.transcript.append(transcript_entry)
|
| 288 |
+
self._save_transcript()
|
| 289 |
+
|
| 290 |
+
def _save_transcript(self):
|
| 291 |
+
"""Save transcript to file."""
|
| 292 |
+
if not self.save_transcript:
|
| 293 |
+
return
|
| 294 |
+
|
| 295 |
+
try:
|
| 296 |
+
# Create transcript data
|
| 297 |
+
transcript_data = {
|
| 298 |
+
'session_start': self.current_session_start.isoformat(),
|
| 299 |
+
'last_updated': datetime.now().isoformat(),
|
| 300 |
+
'entries': self.transcript
|
| 301 |
+
}
|
| 302 |
+
|
| 303 |
+
# Save as JSON
|
| 304 |
+
json_file = os.path.splitext(self.transcript_file)[0] + '.json'
|
| 305 |
+
with open(json_file, 'w') as f:
|
| 306 |
+
json.dump(transcript_data, f, indent=2)
|
| 307 |
+
|
| 308 |
+
# Save as readable text
|
| 309 |
+
with open(self.transcript_file, 'w') as f:
|
| 310 |
+
f.write(f"Sign Language Detection Transcript\n")
|
| 311 |
+
f.write(f"Session started: {self.current_session_start.strftime('%Y-%m-%d %H:%M:%S')}\n")
|
| 312 |
+
f.write(f"Last updated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
|
| 313 |
+
f.write("=" * 50 + "\n\n")
|
| 314 |
+
|
| 315 |
+
for entry in self.transcript:
|
| 316 |
+
timestamp = datetime.fromisoformat(entry['timestamp'])
|
| 317 |
+
f.write(f"[{timestamp.strftime('%H:%M:%S')}] {entry['display_text']}\n")
|
| 318 |
+
|
| 319 |
+
except Exception as e:
|
| 320 |
+
print(f"Error saving transcript: {e}")
|
| 321 |
+
|
| 322 |
+
def get_transcript_summary(self) -> Dict[str, Any]:
|
| 323 |
+
"""
|
| 324 |
+
Get summary of current transcript.
|
| 325 |
+
|
| 326 |
+
Returns:
|
| 327 |
+
Dictionary containing transcript summary
|
| 328 |
+
"""
|
| 329 |
+
if not self.transcript:
|
| 330 |
+
return {'total_entries': 0, 'detections': 0, 'sequences': 0}
|
| 331 |
+
|
| 332 |
+
detections = sum(1 for entry in self.transcript if entry['type'] == 'detection')
|
| 333 |
+
sequences = sum(1 for entry in self.transcript if entry['type'] == 'sequence')
|
| 334 |
+
|
| 335 |
+
# Extract detected words and letters
|
| 336 |
+
detected_words = []
|
| 337 |
+
detected_letters = []
|
| 338 |
+
|
| 339 |
+
for entry in self.transcript:
|
| 340 |
+
if entry['type'] == 'detection':
|
| 341 |
+
classification = entry.get('detection', {}).get('classification', {})
|
| 342 |
+
if classification.get('word'):
|
| 343 |
+
detected_words.append(classification['word'])
|
| 344 |
+
if classification.get('letter'):
|
| 345 |
+
detected_letters.append(classification['letter'])
|
| 346 |
+
elif entry['type'] == 'sequence':
|
| 347 |
+
sequence_result = entry.get('sequence_result', {})
|
| 348 |
+
if sequence_result.get('word'):
|
| 349 |
+
detected_words.append(sequence_result['word'])
|
| 350 |
+
if sequence_result.get('sentence'):
|
| 351 |
+
detected_words.extend(sequence_result['sentence'].split())
|
| 352 |
+
|
| 353 |
+
return {
|
| 354 |
+
'total_entries': len(self.transcript),
|
| 355 |
+
'detections': detections,
|
| 356 |
+
'sequences': sequences,
|
| 357 |
+
'detected_words': list(set(detected_words)),
|
| 358 |
+
'detected_letters': list(set(detected_letters)),
|
| 359 |
+
'session_duration': (datetime.now() - self.current_session_start).total_seconds()
|
| 360 |
+
}
|
| 361 |
+
|
| 362 |
+
def clear_transcript(self):
|
| 363 |
+
"""Clear the current transcript."""
|
| 364 |
+
self.transcript = []
|
| 365 |
+
self.current_session_start = datetime.now()
|
| 366 |
+
print("Transcript cleared")
|
| 367 |
+
|
| 368 |
+
def set_speech_enabled(self, enabled: bool):
|
| 369 |
+
"""Enable or disable speech synthesis."""
|
| 370 |
+
self.enable_speech = enabled
|
| 371 |
+
if not enabled and self.is_speaking:
|
| 372 |
+
# Stop current speech
|
| 373 |
+
if self.tts_engine:
|
| 374 |
+
self.tts_engine.stop()
|
| 375 |
+
|
| 376 |
+
def cleanup(self):
|
| 377 |
+
"""Clean up resources."""
|
| 378 |
+
# Save final transcript
|
| 379 |
+
if self.save_transcript and self.transcript:
|
| 380 |
+
self._save_transcript()
|
| 381 |
+
|
| 382 |
+
# Stop TTS
|
| 383 |
+
if self.tts_thread:
|
| 384 |
+
self.speech_queue.put(None) # Shutdown signal
|
| 385 |
+
self.tts_thread.join(timeout=2.0)
|
| 386 |
+
|
| 387 |
+
if self.tts_engine:
|
| 388 |
+
try:
|
| 389 |
+
self.tts_engine.stop()
|
| 390 |
+
except:
|
| 391 |
+
pass
|
src/src/prediction_logger.py
ADDED
|
@@ -0,0 +1,294 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Comprehensive Prediction Logging System
|
| 3 |
+
|
| 4 |
+
This module provides detailed logging for the sign language prediction pipeline
|
| 5 |
+
to help identify where predictions are failing and track performance.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import json
|
| 9 |
+
import time
|
| 10 |
+
from datetime import datetime
|
| 11 |
+
from typing import Dict, Any, List, Optional
|
| 12 |
+
import os
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class PredictionLogger:
|
| 16 |
+
"""
|
| 17 |
+
Comprehensive logging system for sign language predictions.
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
def __init__(self, log_file: str = "prediction_logs.json", debug: bool = True):
|
| 21 |
+
"""
|
| 22 |
+
Initialize the prediction logger.
|
| 23 |
+
|
| 24 |
+
Args:
|
| 25 |
+
log_file: Path to the log file
|
| 26 |
+
debug: Whether to print debug information
|
| 27 |
+
"""
|
| 28 |
+
self.log_file = log_file
|
| 29 |
+
self.debug = debug
|
| 30 |
+
self.session_id = f"session_{int(time.time())}"
|
| 31 |
+
self.logs = []
|
| 32 |
+
|
| 33 |
+
if self.debug:
|
| 34 |
+
print(f"🔍 Prediction Logger initialized - Session: {self.session_id}")
|
| 35 |
+
|
| 36 |
+
def log_hand_detection(self, image_info: Dict[str, Any], hands_detected: int,
|
| 37 |
+
detection_confidence: List[float] = None) -> str:
|
| 38 |
+
"""
|
| 39 |
+
Log hand detection results.
|
| 40 |
+
|
| 41 |
+
Args:
|
| 42 |
+
image_info: Information about the processed image
|
| 43 |
+
hands_detected: Number of hands detected
|
| 44 |
+
detection_confidence: List of confidence scores for detected hands
|
| 45 |
+
|
| 46 |
+
Returns:
|
| 47 |
+
Log entry ID
|
| 48 |
+
"""
|
| 49 |
+
log_entry = {
|
| 50 |
+
"id": f"hand_det_{int(time.time() * 1000)}",
|
| 51 |
+
"timestamp": datetime.now().isoformat(),
|
| 52 |
+
"session_id": self.session_id,
|
| 53 |
+
"stage": "hand_detection",
|
| 54 |
+
"image_info": image_info,
|
| 55 |
+
"hands_detected": hands_detected,
|
| 56 |
+
"detection_confidence": detection_confidence or [],
|
| 57 |
+
"success": hands_detected > 0
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
self.logs.append(log_entry)
|
| 61 |
+
|
| 62 |
+
if self.debug:
|
| 63 |
+
status = "✅" if hands_detected > 0 else "❌"
|
| 64 |
+
print(f"{status} Hand Detection: {hands_detected} hands detected")
|
| 65 |
+
if detection_confidence:
|
| 66 |
+
for i, conf in enumerate(detection_confidence):
|
| 67 |
+
print(f" Hand {i+1}: {conf:.1%} confidence")
|
| 68 |
+
|
| 69 |
+
return log_entry["id"]
|
| 70 |
+
|
| 71 |
+
def log_gesture_extraction(self, hand_data: Dict[str, Any],
|
| 72 |
+
gesture_description: str) -> str:
|
| 73 |
+
"""
|
| 74 |
+
Log gesture extraction results.
|
| 75 |
+
|
| 76 |
+
Args:
|
| 77 |
+
hand_data: Hand landmark data
|
| 78 |
+
gesture_description: Generated gesture description
|
| 79 |
+
|
| 80 |
+
Returns:
|
| 81 |
+
Log entry ID
|
| 82 |
+
"""
|
| 83 |
+
log_entry = {
|
| 84 |
+
"id": f"gest_ext_{int(time.time() * 1000)}",
|
| 85 |
+
"timestamp": datetime.now().isoformat(),
|
| 86 |
+
"session_id": self.session_id,
|
| 87 |
+
"stage": "gesture_extraction",
|
| 88 |
+
"hand_label": hand_data.get('label', 'Unknown'),
|
| 89 |
+
"hand_confidence": hand_data.get('confidence', 0.0),
|
| 90 |
+
"gesture_description": gesture_description,
|
| 91 |
+
"description_length": len(gesture_description),
|
| 92 |
+
"success": len(gesture_description) > 0
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
self.logs.append(log_entry)
|
| 96 |
+
|
| 97 |
+
if self.debug:
|
| 98 |
+
print(f"✅ Gesture Extraction: {len(gesture_description)} chars description")
|
| 99 |
+
print(f" Hand: {hand_data.get('label', 'Unknown')} ({hand_data.get('confidence', 0):.1%})")
|
| 100 |
+
|
| 101 |
+
return log_entry["id"]
|
| 102 |
+
|
| 103 |
+
def log_ai_classification(self, gesture_description: str, ai_provider: str,
|
| 104 |
+
response: Dict[str, Any], success: bool,
|
| 105 |
+
error_message: str = None) -> str:
|
| 106 |
+
"""
|
| 107 |
+
Log AI classification attempts.
|
| 108 |
+
|
| 109 |
+
Args:
|
| 110 |
+
gesture_description: Input gesture description
|
| 111 |
+
ai_provider: AI provider used (gemini, openai, etc.)
|
| 112 |
+
response: AI response data
|
| 113 |
+
success: Whether the classification succeeded
|
| 114 |
+
error_message: Error message if failed
|
| 115 |
+
|
| 116 |
+
Returns:
|
| 117 |
+
Log entry ID
|
| 118 |
+
"""
|
| 119 |
+
log_entry = {
|
| 120 |
+
"id": f"ai_class_{int(time.time() * 1000)}",
|
| 121 |
+
"timestamp": datetime.now().isoformat(),
|
| 122 |
+
"session_id": self.session_id,
|
| 123 |
+
"stage": "ai_classification",
|
| 124 |
+
"ai_provider": ai_provider,
|
| 125 |
+
"input_description": gesture_description,
|
| 126 |
+
"response": response,
|
| 127 |
+
"success": success,
|
| 128 |
+
"error_message": error_message,
|
| 129 |
+
"prediction": response.get('word') or response.get('letter') if success else None,
|
| 130 |
+
"confidence": response.get('confidence', 0.0) if success else 0.0
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
self.logs.append(log_entry)
|
| 134 |
+
|
| 135 |
+
if self.debug:
|
| 136 |
+
status = "✅" if success else "❌"
|
| 137 |
+
if success:
|
| 138 |
+
prediction = response.get('word') or response.get('letter') or 'No prediction'
|
| 139 |
+
confidence = response.get('confidence', 0.0)
|
| 140 |
+
print(f"{status} AI Classification ({ai_provider}): {prediction} ({confidence:.1%})")
|
| 141 |
+
else:
|
| 142 |
+
print(f"{status} AI Classification ({ai_provider}) Failed: {error_message}")
|
| 143 |
+
|
| 144 |
+
return log_entry["id"]
|
| 145 |
+
|
| 146 |
+
def log_fallback_classification(self, gesture_description: str,
|
| 147 |
+
response: Dict[str, Any], success: bool) -> str:
|
| 148 |
+
"""
|
| 149 |
+
Log fallback classification results.
|
| 150 |
+
|
| 151 |
+
Args:
|
| 152 |
+
gesture_description: Input gesture description
|
| 153 |
+
response: Fallback classifier response
|
| 154 |
+
success: Whether the classification succeeded
|
| 155 |
+
|
| 156 |
+
Returns:
|
| 157 |
+
Log entry ID
|
| 158 |
+
"""
|
| 159 |
+
log_entry = {
|
| 160 |
+
"id": f"fallback_{int(time.time() * 1000)}",
|
| 161 |
+
"timestamp": datetime.now().isoformat(),
|
| 162 |
+
"session_id": self.session_id,
|
| 163 |
+
"stage": "fallback_classification",
|
| 164 |
+
"input_description": gesture_description,
|
| 165 |
+
"response": response,
|
| 166 |
+
"success": success,
|
| 167 |
+
"prediction": response.get('word') or response.get('letter') if success else None,
|
| 168 |
+
"confidence": response.get('confidence', 0.0) if success else 0.0
|
| 169 |
+
}
|
| 170 |
+
|
| 171 |
+
self.logs.append(log_entry)
|
| 172 |
+
|
| 173 |
+
if self.debug:
|
| 174 |
+
status = "✅" if success else "❌"
|
| 175 |
+
if success:
|
| 176 |
+
prediction = response.get('word') or response.get('letter') or 'No prediction'
|
| 177 |
+
confidence = response.get('confidence', 0.0)
|
| 178 |
+
print(f"{status} Fallback Classification: {prediction} ({confidence:.1%})")
|
| 179 |
+
else:
|
| 180 |
+
print(f"{status} Fallback Classification Failed")
|
| 181 |
+
|
| 182 |
+
return log_entry["id"]
|
| 183 |
+
|
| 184 |
+
def log_final_prediction(self, file_path: str, final_prediction: str,
|
| 185 |
+
confidence: float, method_used: str,
|
| 186 |
+
processing_time: float) -> str:
|
| 187 |
+
"""
|
| 188 |
+
Log final prediction results.
|
| 189 |
+
|
| 190 |
+
Args:
|
| 191 |
+
file_path: Path to the processed file
|
| 192 |
+
final_prediction: Final prediction result
|
| 193 |
+
confidence: Prediction confidence
|
| 194 |
+
method_used: Method that provided the final prediction
|
| 195 |
+
processing_time: Total processing time in seconds
|
| 196 |
+
|
| 197 |
+
Returns:
|
| 198 |
+
Log entry ID
|
| 199 |
+
"""
|
| 200 |
+
log_entry = {
|
| 201 |
+
"id": f"final_{int(time.time() * 1000)}",
|
| 202 |
+
"timestamp": datetime.now().isoformat(),
|
| 203 |
+
"session_id": self.session_id,
|
| 204 |
+
"stage": "final_prediction",
|
| 205 |
+
"file_path": file_path,
|
| 206 |
+
"final_prediction": final_prediction,
|
| 207 |
+
"confidence": confidence,
|
| 208 |
+
"method_used": method_used,
|
| 209 |
+
"processing_time": processing_time,
|
| 210 |
+
"success": final_prediction is not None and final_prediction != "No prediction"
|
| 211 |
+
}
|
| 212 |
+
|
| 213 |
+
self.logs.append(log_entry)
|
| 214 |
+
|
| 215 |
+
if self.debug:
|
| 216 |
+
status = "🎯" if log_entry["success"] else "❌"
|
| 217 |
+
print(f"{status} Final Prediction: {final_prediction} ({confidence:.1%}) via {method_used}")
|
| 218 |
+
print(f" Processing time: {processing_time:.2f}s")
|
| 219 |
+
|
| 220 |
+
return log_entry["id"]
|
| 221 |
+
|
| 222 |
+
def get_session_summary(self) -> Dict[str, Any]:
|
| 223 |
+
"""
|
| 224 |
+
Get a summary of the current session.
|
| 225 |
+
|
| 226 |
+
Returns:
|
| 227 |
+
Session summary statistics
|
| 228 |
+
"""
|
| 229 |
+
total_predictions = len([log for log in self.logs if log["stage"] == "final_prediction"])
|
| 230 |
+
successful_predictions = len([log for log in self.logs
|
| 231 |
+
if log["stage"] == "final_prediction" and log["success"]])
|
| 232 |
+
|
| 233 |
+
hand_detections = len([log for log in self.logs if log["stage"] == "hand_detection"])
|
| 234 |
+
successful_hand_detections = len([log for log in self.logs
|
| 235 |
+
if log["stage"] == "hand_detection" and log["success"]])
|
| 236 |
+
|
| 237 |
+
ai_attempts = len([log for log in self.logs if log["stage"] == "ai_classification"])
|
| 238 |
+
successful_ai = len([log for log in self.logs
|
| 239 |
+
if log["stage"] == "ai_classification" and log["success"]])
|
| 240 |
+
|
| 241 |
+
fallback_attempts = len([log for log in self.logs if log["stage"] == "fallback_classification"])
|
| 242 |
+
|
| 243 |
+
summary = {
|
| 244 |
+
"session_id": self.session_id,
|
| 245 |
+
"total_files_processed": total_predictions,
|
| 246 |
+
"successful_predictions": successful_predictions,
|
| 247 |
+
"prediction_success_rate": successful_predictions / total_predictions if total_predictions > 0 else 0,
|
| 248 |
+
"hand_detection_success_rate": successful_hand_detections / hand_detections if hand_detections > 0 else 0,
|
| 249 |
+
"ai_classification_success_rate": successful_ai / ai_attempts if ai_attempts > 0 else 0,
|
| 250 |
+
"fallback_usage_rate": fallback_attempts / total_predictions if total_predictions > 0 else 0,
|
| 251 |
+
"total_logs": len(self.logs)
|
| 252 |
+
}
|
| 253 |
+
|
| 254 |
+
return summary
|
| 255 |
+
|
| 256 |
+
def save_logs(self) -> bool:
|
| 257 |
+
"""
|
| 258 |
+
Save logs to file.
|
| 259 |
+
|
| 260 |
+
Returns:
|
| 261 |
+
True if successful, False otherwise
|
| 262 |
+
"""
|
| 263 |
+
try:
|
| 264 |
+
with open(self.log_file, 'w') as f:
|
| 265 |
+
json.dump({
|
| 266 |
+
"session_summary": self.get_session_summary(),
|
| 267 |
+
"logs": self.logs
|
| 268 |
+
}, f, indent=2)
|
| 269 |
+
|
| 270 |
+
if self.debug:
|
| 271 |
+
print(f"💾 Logs saved to {self.log_file}")
|
| 272 |
+
|
| 273 |
+
return True
|
| 274 |
+
except Exception as e:
|
| 275 |
+
if self.debug:
|
| 276 |
+
print(f"❌ Failed to save logs: {e}")
|
| 277 |
+
return False
|
| 278 |
+
|
| 279 |
+
def print_summary(self):
|
| 280 |
+
"""Print a summary of the current session."""
|
| 281 |
+
summary = self.get_session_summary()
|
| 282 |
+
|
| 283 |
+
print("\n" + "="*50)
|
| 284 |
+
print("📊 PREDICTION SESSION SUMMARY")
|
| 285 |
+
print("="*50)
|
| 286 |
+
print(f"Session ID: {summary['session_id']}")
|
| 287 |
+
print(f"Files Processed: {summary['total_files_processed']}")
|
| 288 |
+
print(f"Successful Predictions: {summary['successful_predictions']}")
|
| 289 |
+
print(f"Prediction Success Rate: {summary['prediction_success_rate']:.1%}")
|
| 290 |
+
print(f"Hand Detection Success Rate: {summary['hand_detection_success_rate']:.1%}")
|
| 291 |
+
print(f"AI Classification Success Rate: {summary['ai_classification_success_rate']:.1%}")
|
| 292 |
+
print(f"Fallback Usage Rate: {summary['fallback_usage_rate']:.1%}")
|
| 293 |
+
print(f"Total Log Entries: {summary['total_logs']}")
|
| 294 |
+
print("="*50)
|
src/src/visualization_utils.py
ADDED
|
@@ -0,0 +1,359 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Visualization utilities for enhanced result display
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import cv2
|
| 6 |
+
import numpy as np
|
| 7 |
+
import matplotlib.pyplot as plt
|
| 8 |
+
import matplotlib.patches as patches
|
| 9 |
+
from matplotlib.patches import Circle
|
| 10 |
+
import plotly.graph_objects as go
|
| 11 |
+
import plotly.express as px
|
| 12 |
+
from plotly.subplots import make_subplots
|
| 13 |
+
from typing import List, Dict, Any, Tuple
|
| 14 |
+
import pandas as pd
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class HandLandmarkVisualizer:
|
| 18 |
+
"""
|
| 19 |
+
Enhanced visualization for hand landmarks and gesture analysis.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
def __init__(self):
|
| 23 |
+
"""Initialize the visualizer."""
|
| 24 |
+
# MediaPipe hand landmark connections
|
| 25 |
+
self.hand_connections = [
|
| 26 |
+
(0, 1), (1, 2), (2, 3), (3, 4), # Thumb
|
| 27 |
+
(0, 5), (5, 6), (6, 7), (7, 8), # Index finger
|
| 28 |
+
(0, 9), (9, 10), (10, 11), (11, 12), # Middle finger
|
| 29 |
+
(0, 13), (13, 14), (14, 15), (15, 16), # Ring finger
|
| 30 |
+
(0, 17), (17, 18), (18, 19), (19, 20), # Pinky
|
| 31 |
+
(5, 9), (9, 13), (13, 17) # Palm connections
|
| 32 |
+
]
|
| 33 |
+
|
| 34 |
+
# Color scheme for different parts
|
| 35 |
+
self.colors = {
|
| 36 |
+
'thumb': (255, 0, 0), # Red
|
| 37 |
+
'index': (0, 255, 0), # Green
|
| 38 |
+
'middle': (0, 0, 255), # Blue
|
| 39 |
+
'ring': (255, 255, 0), # Yellow
|
| 40 |
+
'pinky': (255, 0, 255), # Magenta
|
| 41 |
+
'palm': (0, 255, 255), # Cyan
|
| 42 |
+
'wrist': (128, 128, 128) # Gray
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
# Finger landmark ranges
|
| 46 |
+
self.finger_ranges = {
|
| 47 |
+
'thumb': range(1, 5),
|
| 48 |
+
'index': range(5, 9),
|
| 49 |
+
'middle': range(9, 13),
|
| 50 |
+
'ring': range(13, 17),
|
| 51 |
+
'pinky': range(17, 21),
|
| 52 |
+
'wrist': [0]
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
def draw_enhanced_landmarks(self, image: np.ndarray,
|
| 56 |
+
hand_landmarks: List[Dict[str, Any]]) -> np.ndarray:
|
| 57 |
+
"""
|
| 58 |
+
Draw enhanced hand landmarks with color coding and connections.
|
| 59 |
+
|
| 60 |
+
Args:
|
| 61 |
+
image: Input image
|
| 62 |
+
hand_landmarks: List of hand landmark data
|
| 63 |
+
|
| 64 |
+
Returns:
|
| 65 |
+
Image with enhanced landmarks drawn
|
| 66 |
+
"""
|
| 67 |
+
annotated_image = image.copy()
|
| 68 |
+
height, width = image.shape[:2]
|
| 69 |
+
|
| 70 |
+
for hand_data in hand_landmarks:
|
| 71 |
+
landmarks = hand_data['landmarks']
|
| 72 |
+
hand_label = hand_data['label']
|
| 73 |
+
|
| 74 |
+
# Convert normalized coordinates to pixel coordinates
|
| 75 |
+
landmark_points = []
|
| 76 |
+
for landmark in landmarks:
|
| 77 |
+
x = int(landmark['x'] * width)
|
| 78 |
+
y = int(landmark['y'] * height)
|
| 79 |
+
landmark_points.append((x, y))
|
| 80 |
+
|
| 81 |
+
# Draw connections
|
| 82 |
+
for connection in self.hand_connections:
|
| 83 |
+
start_idx, end_idx = connection
|
| 84 |
+
start_point = landmark_points[start_idx]
|
| 85 |
+
end_point = landmark_points[end_idx]
|
| 86 |
+
|
| 87 |
+
# Determine color based on finger
|
| 88 |
+
color = self._get_connection_color(start_idx, end_idx)
|
| 89 |
+
cv2.line(annotated_image, start_point, end_point, color, 2)
|
| 90 |
+
|
| 91 |
+
# Draw landmark points
|
| 92 |
+
for i, point in enumerate(landmark_points):
|
| 93 |
+
color = self._get_landmark_color(i)
|
| 94 |
+
cv2.circle(annotated_image, point, 4, color, -1)
|
| 95 |
+
cv2.circle(annotated_image, point, 6, (255, 255, 255), 1)
|
| 96 |
+
|
| 97 |
+
# Add hand label
|
| 98 |
+
if landmark_points:
|
| 99 |
+
label_pos = (landmark_points[0][0] - 50, landmark_points[0][1] - 20)
|
| 100 |
+
cv2.putText(annotated_image, f"{hand_label} Hand", label_pos,
|
| 101 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
|
| 102 |
+
cv2.putText(annotated_image, f"{hand_label} Hand", label_pos,
|
| 103 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 1)
|
| 104 |
+
|
| 105 |
+
return annotated_image
|
| 106 |
+
|
| 107 |
+
def _get_landmark_color(self, landmark_idx: int) -> Tuple[int, int, int]:
|
| 108 |
+
"""Get color for a specific landmark."""
|
| 109 |
+
for finger, indices in self.finger_ranges.items():
|
| 110 |
+
if landmark_idx in indices:
|
| 111 |
+
return self.colors[finger]
|
| 112 |
+
return (128, 128, 128) # Default gray
|
| 113 |
+
|
| 114 |
+
def _get_connection_color(self, start_idx: int, end_idx: int) -> Tuple[int, int, int]:
|
| 115 |
+
"""Get color for a connection between landmarks."""
|
| 116 |
+
# Use the color of the finger that both landmarks belong to
|
| 117 |
+
for finger, indices in self.finger_ranges.items():
|
| 118 |
+
if start_idx in indices and end_idx in indices:
|
| 119 |
+
return self.colors[finger]
|
| 120 |
+
return self.colors['palm'] # Default to palm color
|
| 121 |
+
|
| 122 |
+
def create_3d_hand_plot(self, hand_landmarks: Dict[str, Any]) -> go.Figure:
|
| 123 |
+
"""
|
| 124 |
+
Create a 3D visualization of hand landmarks.
|
| 125 |
+
|
| 126 |
+
Args:
|
| 127 |
+
hand_landmarks: Hand landmark data
|
| 128 |
+
|
| 129 |
+
Returns:
|
| 130 |
+
Plotly 3D figure
|
| 131 |
+
"""
|
| 132 |
+
landmarks = hand_landmarks['landmarks']
|
| 133 |
+
|
| 134 |
+
# Extract coordinates
|
| 135 |
+
x_coords = [landmark['x'] for landmark in landmarks]
|
| 136 |
+
y_coords = [-landmark['y'] for landmark in landmarks] # Flip Y for proper orientation
|
| 137 |
+
z_coords = [landmark['z'] for landmark in landmarks]
|
| 138 |
+
|
| 139 |
+
# Create 3D scatter plot
|
| 140 |
+
fig = go.Figure()
|
| 141 |
+
|
| 142 |
+
# Add landmark points
|
| 143 |
+
fig.add_trace(go.Scatter3d(
|
| 144 |
+
x=x_coords,
|
| 145 |
+
y=y_coords,
|
| 146 |
+
z=z_coords,
|
| 147 |
+
mode='markers',
|
| 148 |
+
marker=dict(
|
| 149 |
+
size=8,
|
| 150 |
+
color=z_coords,
|
| 151 |
+
colorscale='Viridis',
|
| 152 |
+
showscale=True,
|
| 153 |
+
colorbar=dict(title="Depth")
|
| 154 |
+
),
|
| 155 |
+
text=[f"Landmark {i}" for i in range(len(landmarks))],
|
| 156 |
+
name="Hand Landmarks"
|
| 157 |
+
))
|
| 158 |
+
|
| 159 |
+
# Add connections
|
| 160 |
+
for connection in self.hand_connections:
|
| 161 |
+
start_idx, end_idx = connection
|
| 162 |
+
fig.add_trace(go.Scatter3d(
|
| 163 |
+
x=[x_coords[start_idx], x_coords[end_idx]],
|
| 164 |
+
y=[y_coords[start_idx], y_coords[end_idx]],
|
| 165 |
+
z=[z_coords[start_idx], z_coords[end_idx]],
|
| 166 |
+
mode='lines',
|
| 167 |
+
line=dict(color='rgba(100, 100, 100, 0.6)', width=3),
|
| 168 |
+
showlegend=False
|
| 169 |
+
))
|
| 170 |
+
|
| 171 |
+
# Update layout
|
| 172 |
+
fig.update_layout(
|
| 173 |
+
title=f"3D Hand Landmarks - {hand_landmarks['label']} Hand",
|
| 174 |
+
scene=dict(
|
| 175 |
+
xaxis_title="X",
|
| 176 |
+
yaxis_title="Y",
|
| 177 |
+
zaxis_title="Z (Depth)",
|
| 178 |
+
camera=dict(
|
| 179 |
+
eye=dict(x=1.5, y=1.5, z=1.5)
|
| 180 |
+
)
|
| 181 |
+
),
|
| 182 |
+
width=600,
|
| 183 |
+
height=500
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
return fig
|
| 187 |
+
|
| 188 |
+
def create_gesture_feature_radar(self, gesture_features: Dict[str, float]) -> go.Figure:
|
| 189 |
+
"""
|
| 190 |
+
Create a radar chart for gesture features.
|
| 191 |
+
|
| 192 |
+
Args:
|
| 193 |
+
gesture_features: Dictionary of gesture features
|
| 194 |
+
|
| 195 |
+
Returns:
|
| 196 |
+
Plotly radar chart figure
|
| 197 |
+
"""
|
| 198 |
+
# Normalize features for radar chart
|
| 199 |
+
features = ['Thumb Ext.', 'Index Ext.', 'Middle Ext.', 'Ring Ext.', 'Pinky Ext.',
|
| 200 |
+
'Thumb-Index Angle', 'Palm Orientation', 'Hand Openness']
|
| 201 |
+
|
| 202 |
+
# Extract and normalize values
|
| 203 |
+
values = [
|
| 204 |
+
gesture_features.get('thumb_extended', 0),
|
| 205 |
+
gesture_features.get('index_extended', 0),
|
| 206 |
+
gesture_features.get('middle_extended', 0),
|
| 207 |
+
gesture_features.get('ring_extended', 0),
|
| 208 |
+
gesture_features.get('pinky_extended', 0),
|
| 209 |
+
gesture_features.get('thumb_index_angle', 0) / 180, # Normalize angle
|
| 210 |
+
gesture_features.get('palm_orientation', 0) / 180, # Normalize angle
|
| 211 |
+
gesture_features.get('hand_openness', 0)
|
| 212 |
+
]
|
| 213 |
+
|
| 214 |
+
fig = go.Figure()
|
| 215 |
+
|
| 216 |
+
fig.add_trace(go.Scatterpolar(
|
| 217 |
+
r=values,
|
| 218 |
+
theta=features,
|
| 219 |
+
fill='toself',
|
| 220 |
+
name='Gesture Features',
|
| 221 |
+
line_color='rgb(46, 134, 171)'
|
| 222 |
+
))
|
| 223 |
+
|
| 224 |
+
fig.update_layout(
|
| 225 |
+
polar=dict(
|
| 226 |
+
radialaxis=dict(
|
| 227 |
+
visible=True,
|
| 228 |
+
range=[0, 1]
|
| 229 |
+
)
|
| 230 |
+
),
|
| 231 |
+
title="Gesture Feature Analysis",
|
| 232 |
+
showlegend=True
|
| 233 |
+
)
|
| 234 |
+
|
| 235 |
+
return fig
|
| 236 |
+
|
| 237 |
+
def create_confidence_gauge(self, confidence: float, title: str = "Confidence") -> go.Figure:
|
| 238 |
+
"""
|
| 239 |
+
Create a gauge chart for confidence scores.
|
| 240 |
+
|
| 241 |
+
Args:
|
| 242 |
+
confidence: Confidence value (0-1)
|
| 243 |
+
title: Title for the gauge
|
| 244 |
+
|
| 245 |
+
Returns:
|
| 246 |
+
Plotly gauge figure
|
| 247 |
+
"""
|
| 248 |
+
fig = go.Figure(go.Indicator(
|
| 249 |
+
mode="gauge+number+delta",
|
| 250 |
+
value=confidence * 100,
|
| 251 |
+
domain={'x': [0, 1], 'y': [0, 1]},
|
| 252 |
+
title={'text': title},
|
| 253 |
+
delta={'reference': 80},
|
| 254 |
+
gauge={
|
| 255 |
+
'axis': {'range': [None, 100]},
|
| 256 |
+
'bar': {'color': "darkblue"},
|
| 257 |
+
'steps': [
|
| 258 |
+
{'range': [0, 50], 'color': "lightgray"},
|
| 259 |
+
{'range': [50, 80], 'color': "gray"}
|
| 260 |
+
],
|
| 261 |
+
'threshold': {
|
| 262 |
+
'line': {'color': "red", 'width': 4},
|
| 263 |
+
'thickness': 0.75,
|
| 264 |
+
'value': 90
|
| 265 |
+
}
|
| 266 |
+
}
|
| 267 |
+
))
|
| 268 |
+
|
| 269 |
+
fig.update_layout(height=300)
|
| 270 |
+
return fig
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
def create_comparison_view(original_image: np.ndarray,
|
| 274 |
+
annotated_image: np.ndarray) -> np.ndarray:
|
| 275 |
+
"""
|
| 276 |
+
Create a side-by-side comparison view of original and annotated images.
|
| 277 |
+
|
| 278 |
+
Args:
|
| 279 |
+
original_image: Original input image
|
| 280 |
+
annotated_image: Image with landmarks drawn
|
| 281 |
+
|
| 282 |
+
Returns:
|
| 283 |
+
Combined comparison image
|
| 284 |
+
"""
|
| 285 |
+
# Ensure both images have the same height
|
| 286 |
+
height = max(original_image.shape[0], annotated_image.shape[0])
|
| 287 |
+
|
| 288 |
+
# Resize images to same height if needed
|
| 289 |
+
if original_image.shape[0] != height:
|
| 290 |
+
aspect_ratio = original_image.shape[1] / original_image.shape[0]
|
| 291 |
+
new_width = int(height * aspect_ratio)
|
| 292 |
+
original_image = cv2.resize(original_image, (new_width, height))
|
| 293 |
+
|
| 294 |
+
if annotated_image.shape[0] != height:
|
| 295 |
+
aspect_ratio = annotated_image.shape[1] / annotated_image.shape[0]
|
| 296 |
+
new_width = int(height * aspect_ratio)
|
| 297 |
+
annotated_image = cv2.resize(annotated_image, (new_width, height))
|
| 298 |
+
|
| 299 |
+
# Create comparison image
|
| 300 |
+
comparison = np.hstack([original_image, annotated_image])
|
| 301 |
+
|
| 302 |
+
# Add labels
|
| 303 |
+
cv2.putText(comparison, "Original", (10, 30),
|
| 304 |
+
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
|
| 305 |
+
cv2.putText(comparison, "Detected", (original_image.shape[1] + 10, 30),
|
| 306 |
+
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
|
| 307 |
+
|
| 308 |
+
return comparison
|
| 309 |
+
|
| 310 |
+
|
| 311 |
+
def create_processing_timeline(frame_detections: List[Dict[str, Any]]) -> go.Figure:
|
| 312 |
+
"""
|
| 313 |
+
Create a timeline visualization for video processing results.
|
| 314 |
+
|
| 315 |
+
Args:
|
| 316 |
+
frame_detections: List of frame detection results
|
| 317 |
+
|
| 318 |
+
Returns:
|
| 319 |
+
Plotly timeline figure
|
| 320 |
+
"""
|
| 321 |
+
if not frame_detections:
|
| 322 |
+
return go.Figure()
|
| 323 |
+
|
| 324 |
+
# Prepare data
|
| 325 |
+
timestamps = [frame['timestamp'] for frame in frame_detections]
|
| 326 |
+
hands_detected = [frame['hands_detected'] for frame in frame_detections]
|
| 327 |
+
frame_numbers = [frame['frame_number'] for frame in frame_detections]
|
| 328 |
+
|
| 329 |
+
# Create timeline plot
|
| 330 |
+
fig = go.Figure()
|
| 331 |
+
|
| 332 |
+
# Add hands detected over time
|
| 333 |
+
fig.add_trace(go.Scatter(
|
| 334 |
+
x=timestamps,
|
| 335 |
+
y=hands_detected,
|
| 336 |
+
mode='markers+lines',
|
| 337 |
+
name='Hands Detected',
|
| 338 |
+
marker=dict(
|
| 339 |
+
size=8,
|
| 340 |
+
color=hands_detected,
|
| 341 |
+
colorscale='Viridis',
|
| 342 |
+
showscale=True,
|
| 343 |
+
colorbar=dict(title="Hands")
|
| 344 |
+
),
|
| 345 |
+
text=[f"Frame {fn}" for fn in frame_numbers],
|
| 346 |
+
hovertemplate="<b>Frame %{text}</b><br>" +
|
| 347 |
+
"Time: %{x:.1f}s<br>" +
|
| 348 |
+
"Hands: %{y}<br>" +
|
| 349 |
+
"<extra></extra>"
|
| 350 |
+
))
|
| 351 |
+
|
| 352 |
+
fig.update_layout(
|
| 353 |
+
title="Hand Detection Timeline",
|
| 354 |
+
xaxis_title="Time (seconds)",
|
| 355 |
+
yaxis_title="Number of Hands Detected",
|
| 356 |
+
hovermode='closest'
|
| 357 |
+
)
|
| 358 |
+
|
| 359 |
+
return fig
|