Spaces:

midlajvalappil
/

Sign-Language-Detector-Pro

Sleeping

App Files Files Community

midlajvalappil commited on Jul 12, 2025

Commit

c597d59

verified ·

1 Parent(s): b3783bc

Upload 24 files

Browse files

Files changed (24) hide show

src/src/__init__.py +1 -0
src/src/__pycache__/__init__.cpython-310.pyc +0 -0
src/src/__pycache__/camera_handler.cpython-310.pyc +0 -0
src/src/__pycache__/export_utils.cpython-310.pyc +0 -0
src/src/__pycache__/fallback_classifier.cpython-310.pyc +0 -0
src/src/__pycache__/file_handler.cpython-310.pyc +0 -0
src/src/__pycache__/gemini_classifier.cpython-310.pyc +0 -0
src/src/__pycache__/gesture_extractor.cpython-310.pyc +0 -0
src/src/__pycache__/hand_detector.cpython-310.pyc +0 -0
src/src/__pycache__/openai_classifier.cpython-310.pyc +0 -0
src/src/__pycache__/output_handler.cpython-310.pyc +0 -0
src/src/__pycache__/prediction_logger.cpython-310.pyc +0 -0
src/src/__pycache__/visualization_utils.cpython-310.pyc +0 -0
src/src/camera_handler.py +306 -0
src/src/export_utils.py +418 -0
src/src/fallback_classifier.py +303 -0
src/src/file_handler.py +543 -0
src/src/gemini_classifier.py +420 -0
src/src/gesture_extractor.py +270 -0
src/src/hand_detector.py +196 -0
src/src/openai_classifier.py +392 -0
src/src/output_handler.py +391 -0
src/src/prediction_logger.py +294 -0
src/src/visualization_utils.py +359 -0

src/src/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Sign Language Detector Package

src/src/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (163 Bytes). View file

src/src/__pycache__/camera_handler.cpython-310.pyc ADDED Viewed

Binary file (8.47 kB). View file

src/src/__pycache__/export_utils.cpython-310.pyc ADDED Viewed

Binary file (10.3 kB). View file

src/src/__pycache__/fallback_classifier.cpython-310.pyc ADDED Viewed

Binary file (7.43 kB). View file

src/src/__pycache__/file_handler.cpython-310.pyc ADDED Viewed

Binary file (13.6 kB). View file

src/src/__pycache__/gemini_classifier.cpython-310.pyc ADDED Viewed

Binary file (10.4 kB). View file

src/src/__pycache__/gesture_extractor.cpython-310.pyc ADDED Viewed

Binary file (7.4 kB). View file

src/src/__pycache__/hand_detector.cpython-310.pyc ADDED Viewed

Binary file (5.89 kB). View file

src/src/__pycache__/openai_classifier.cpython-310.pyc ADDED Viewed

Binary file (11 kB). View file

src/src/__pycache__/output_handler.cpython-310.pyc ADDED Viewed

Binary file (10.7 kB). View file

src/src/__pycache__/prediction_logger.cpython-310.pyc ADDED Viewed

Binary file (9.71 kB). View file

src/src/__pycache__/visualization_utils.cpython-310.pyc ADDED Viewed

Binary file (9.67 kB). View file

src/src/camera_handler.py ADDED Viewed

	@@ -0,0 +1,306 @@

+"""
+Real-time Camera Input Handler for Sign Language Detection
+"""
+import cv2
+import numpy as np
+import time
+import threading
+from typing import Optional, Callable, Dict, Any, List
+from queue import Queue, Empty
+from .hand_detector import HandDetector
+from .gesture_extractor import GestureExtractor
+from .openai_classifier import SignLanguageClassifier
+class CameraHandler:
+    """
+    Handles real-time camera input for sign language detection.
+    """
+    def __init__(self,
+                 camera_index: int = 0,
+                 frame_width: int = 640,
+                 frame_height: int = 480,
+                 fps: int = 30,
+                 detection_interval: float = 2.0):
+        """
+        Initialize the CameraHandler.
+        Args:
+            camera_index: Index of the camera to use
+            frame_width: Width of the camera frame
+            frame_height: Height of the camera frame
+            fps: Frames per second for camera capture
+            detection_interval: Seconds between gesture classifications
+        """
+        self.camera_index = camera_index
+        self.frame_width = frame_width
+        self.frame_height = frame_height
+        self.fps = fps
+        self.detection_interval = detection_interval
+        # Initialize components
+        self.hand_detector = HandDetector()
+        self.gesture_extractor = GestureExtractor()
+        self.classifier = None  # Will be initialized when needed
+        # Camera and threading
+        self.cap = None
+        self.is_running = False
+        self.capture_thread = None
+        self.detection_thread = None
+        # Frame and detection queues
+        self.frame_queue = Queue(maxsize=10)
+        self.detection_queue = Queue(maxsize=5)
+        # Callbacks
+        self.on_frame_callback = None
+        self.on_detection_callback = None
+        # Detection state
+        self.last_detection_time = 0
+        self.gesture_history = []
+        self.max_history_length = 10
+    def initialize_camera(self) -> bool:
+        """
+        Initialize the camera.
+        Returns:
+            True if camera initialized successfully, False otherwise
+        """
+        try:
+            self.cap = cv2.VideoCapture(self.camera_index)
+            if not self.cap.isOpened():
+                print(f"Error: Could not open camera {self.camera_index}")
+                return False
+            # Set camera properties
+            self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, self.frame_width)
+            self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, self.frame_height)
+            self.cap.set(cv2.CAP_PROP_FPS, self.fps)
+            print(f"Camera initialized: {self.frame_width}x{self.frame_height} @ {self.fps}fps")
+            return True
+        except Exception as e:
+            print(f"Error initializing camera: {e}")
+            return False
+    def initialize_classifier(self, api_key: Optional[str] = None) -> bool:
+        """
+        Initialize the OpenAI classifier.
+        Args:
+            api_key: OpenAI API key
+        Returns:
+            True if classifier initialized successfully, False otherwise
+        """
+        try:
+            self.classifier = SignLanguageClassifier(api_key=api_key)
+            print("OpenAI classifier initialized")
+            return True
+        except Exception as e:
+            print(f"Error initializing classifier: {e}")
+            return False
+    def set_callbacks(self,
+                     on_frame: Optional[Callable] = None,
+                     on_detection: Optional[Callable] = None):
+        """
+        Set callback functions for frame and detection events.
+        Args:
+            on_frame: Callback for each processed frame
+            on_detection: Callback for gesture detections
+        """
+        self.on_frame_callback = on_frame
+        self.on_detection_callback = on_detection
+    def start_capture(self) -> bool:
+        """
+        Start the camera capture and detection threads.
+        Returns:
+            True if started successfully, False otherwise
+        """
+        if not self.cap or not self.cap.isOpened():
+            print("Camera not initialized")
+            return False
+        self.is_running = True
+        # Start capture thread
+        self.capture_thread = threading.Thread(target=self._capture_loop, daemon=True)
+        self.capture_thread.start()
+        # Start detection thread
+        self.detection_thread = threading.Thread(target=self._detection_loop, daemon=True)
+        self.detection_thread.start()
+        print("Camera capture started")
+        return True
+    def stop_capture(self):
+        """Stop the camera capture and detection threads."""
+        self.is_running = False
+        if self.capture_thread:
+            self.capture_thread.join(timeout=2.0)
+        if self.detection_thread:
+            self.detection_thread.join(timeout=2.0)
+        if self.cap:
+            self.cap.release()
+        print("Camera capture stopped")
+    def _capture_loop(self):
+        """Main camera capture loop (runs in separate thread)."""
+        while self.is_running:
+            ret, frame = self.cap.read()
+            if not ret:
+                print("Error reading frame from camera")
+                break
+            # Flip frame horizontally for mirror effect
+            frame = cv2.flip(frame, 1)
+            # Detect hands
+            annotated_frame, hand_landmarks = self.hand_detector.detect_hands(frame)
+            # Add frame to queue for detection processing
+            if not self.frame_queue.full():
+                self.frame_queue.put((frame.copy(), hand_landmarks))
+            # Call frame callback if set
+            if self.on_frame_callback:
+                self.on_frame_callback(annotated_frame, hand_landmarks)
+            # Small delay to control frame rate
+            time.sleep(1.0 / self.fps)
+    def _detection_loop(self):
+        """Gesture detection and classification loop (runs in separate thread)."""
+        while self.is_running:
+            try:
+                # Get frame from queue
+                frame, hand_landmarks = self.frame_queue.get(timeout=1.0)
+                # Check if enough time has passed since last detection
+                current_time = time.time()
+                if current_time - self.last_detection_time < self.detection_interval:
+                    continue
+                # Process gestures if hands detected
+                if hand_landmarks and self.classifier:
+                    self._process_gestures(hand_landmarks)
+                    self.last_detection_time = current_time
+            except Empty:
+                continue
+            except Exception as e:
+                print(f"Error in detection loop: {e}")
+    def _process_gestures(self, hand_landmarks: List[Dict[str, Any]]):
+        """
+        Process detected hand landmarks and classify gestures.
+        Args:
+            hand_landmarks: List of detected hand landmarks
+        """
+        detections = []
+        for hand_data in hand_landmarks:
+            try:
+                # Extract gesture features
+                gesture_description = self.gesture_extractor.create_gesture_description(hand_data)
+                # Classify gesture
+                classification = self.classifier.classify_gesture(gesture_description)
+                if classification['success']:
+                    detection = {
+                        'hand_label': hand_data['label'],
+                        'gesture_description': gesture_description,
+                        'classification': classification,
+                        'timestamp': time.time()
+                    }
+                    detections.append(detection)
+                    # Add to gesture history
+                    self.gesture_history.append(detection)
+                    if len(self.gesture_history) > self.max_history_length:
+                        self.gesture_history.pop(0)
+            except Exception as e:
+                print(f"Error processing gesture: {e}")
+        # Call detection callback if detections found
+        if detections and self.on_detection_callback:
+            self.on_detection_callback(detections)
+    def get_recent_gestures(self, count: int = 5) -> List[Dict[str, Any]]:
+        """
+        Get recent gesture detections.
+        Args:
+            count: Number of recent gestures to return
+        Returns:
+            List of recent gesture detections
+        """
+        return self.gesture_history[-count:] if self.gesture_history else []
+    def classify_gesture_sequence(self, count: int = 5) -> Optional[Dict[str, Any]]:
+        """
+        Classify a sequence of recent gestures.
+        Args:
+            count: Number of recent gestures to include in sequence
+        Returns:
+            Sequence classification result or None
+        """
+        if not self.classifier or len(self.gesture_history) < 2:
+            return None
+        recent_gestures = self.get_recent_gestures(count)
+        gesture_descriptions = [g['gesture_description'] for g in recent_gestures]
+        try:
+            return self.classifier.classify_sequence(gesture_descriptions)
+        except Exception as e:
+            print(f"Error classifying gesture sequence: {e}")
+            return None
+    def capture_single_frame(self) -> Optional[np.ndarray]:
+        """
+        Capture a single frame from the camera.
+        Returns:
+            Captured frame or None if error
+        """
+        if not self.cap or not self.cap.isOpened():
+            return None
+        ret, frame = self.cap.read()
+        if ret:
+            return cv2.flip(frame, 1)  # Mirror effect
+        return None
+    def cleanup(self):
+        """Clean up resources."""
+        self.stop_capture()
+        if self.hand_detector:
+            self.hand_detector.cleanup()
+        cv2.destroyAllWindows()

src/src/export_utils.py ADDED Viewed

	@@ -0,0 +1,418 @@

+"""
+Export utilities for sign language detection results
+"""
+import json
+import csv
+import os
+from datetime import datetime
+from typing import List, Dict, Any, Optional
+import pandas as pd
+from reportlab.lib.pagesizes import letter, A4
+from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image as RLImage
+from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
+from reportlab.lib.units import inch
+from reportlab.lib import colors
+from reportlab.lib.enums import TA_CENTER, TA_LEFT
+import tempfile
+import cv2
+import numpy as np
+from PIL import Image
+import io
+import base64
+class ResultExporter:
+    """
+    Export sign language detection results in various formats.
+    """
+    def __init__(self):
+        """Initialize the exporter."""
+        self.styles = getSampleStyleSheet()
+        self.custom_styles = self._create_custom_styles()
+    def _create_custom_styles(self) -> Dict[str, ParagraphStyle]:
+        """Create custom paragraph styles for PDF reports."""
+        custom_styles = {}
+        # Title style
+        custom_styles['CustomTitle'] = ParagraphStyle(
+            'CustomTitle',
+            parent=self.styles['Title'],
+            fontSize=24,
+            spaceAfter=30,
+            alignment=TA_CENTER,
+            textColor=colors.darkblue
+        )
+        # Heading style
+        custom_styles['CustomHeading'] = ParagraphStyle(
+            'CustomHeading',
+            parent=self.styles['Heading1'],
+            fontSize=16,
+            spaceAfter=12,
+            spaceBefore=20,
+            textColor=colors.darkblue
+        )
+        # Subheading style
+        custom_styles['CustomSubheading'] = ParagraphStyle(
+            'CustomSubheading',
+            parent=self.styles['Heading2'],
+            fontSize=14,
+            spaceAfter=8,
+            spaceBefore=12,
+            textColor=colors.darkgreen
+        )
+        return custom_styles
+    def export_to_json(self, results: List[Dict[str, Any]],
+                      output_path: str,
+                      include_metadata: bool = True) -> bool:
+        """
+        Export results to JSON format.
+        Args:
+            results: List of processing results
+            output_path: Output file path
+            include_metadata: Whether to include metadata
+        Returns:
+            True if successful, False otherwise
+        """
+        try:
+            export_data = {
+                'export_timestamp': datetime.now().isoformat(),
+                'total_files': len(results),
+                'successful_files': sum(1 for r in results if r.get('success', False)),
+                'results': []
+            }
+            for result in results:
+                # Clean result for JSON serialization
+                clean_result = self._clean_result_for_export(result)
+                if not include_metadata:
+                    # Remove large data like images
+                    clean_result.pop('annotated_image', None)
+                    clean_result.pop('enhanced_image', None)
+                    clean_result.pop('comparison_image', None)
+                    clean_result.pop('original_image', None)
+                export_data['results'].append(clean_result)
+            with open(output_path, 'w', encoding='utf-8') as f:
+                json.dump(export_data, f, indent=2, default=str, ensure_ascii=False)
+            return True
+        except Exception as e:
+            print(f"Error exporting to JSON: {e}")
+            return False
+    def export_to_csv(self, results: List[Dict[str, Any]], output_path: str) -> bool:
+        """
+        Export results to CSV format.
+        Args:
+            results: List of processing results
+            output_path: Output file path
+        Returns:
+            True if successful, False otherwise
+        """
+        try:
+            csv_data = []
+            for result in results:
+                if not result.get('success'):
+                    csv_data.append({
+                        'filename': result.get('filename', ''),
+                        'file_type': result.get('file_type', ''),
+                        'success': False,
+                        'error': result.get('error', ''),
+                        'hands_detected': 0,
+                        'hand_label': '',
+                        'confidence': 0,
+                        'letter': '',
+                        'word': '',
+                        'ai_confidence': 0
+                    })
+                    continue
+                if result.get('detections'):
+                    for detection in result['detections']:
+                        row = {
+                            'filename': result.get('filename', ''),
+                            'file_type': result.get('file_type', ''),
+                            'success': True,
+                            'error': '',
+                            'hands_detected': result.get('hands_detected', 0),
+                            'hand_label': detection.get('hand_label', ''),
+                            'confidence': detection.get('confidence', 0),
+                            'gesture_description': detection.get('gesture_description', '')
+                        }
+                        # Add classification data if available
+                        if 'classification' in detection and detection['classification'].get('success'):
+                            classification = detection['classification']
+                            row.update({
+                                'letter': classification.get('letter', ''),
+                                'word': classification.get('word', ''),
+                                'ai_confidence': classification.get('confidence', 0)
+                            })
+                        else:
+                            row.update({
+                                'letter': '',
+                                'word': '',
+                                'ai_confidence': 0
+                            })
+                        csv_data.append(row)
+                else:
+                    # No detections
+                    csv_data.append({
+                        'filename': result.get('filename', ''),
+                        'file_type': result.get('file_type', ''),
+                        'success': True,
+                        'error': '',
+                        'hands_detected': 0,
+                        'hand_label': '',
+                        'confidence': 0,
+                        'letter': '',
+                        'word': '',
+                        'ai_confidence': 0
+                    })
+            # Write to CSV
+            if csv_data:
+                df = pd.DataFrame(csv_data)
+                df.to_csv(output_path, index=False)
+                return True
+            return False
+        except Exception as e:
+            print(f"Error exporting to CSV: {e}")
+            return False
+    def export_to_pdf(self, results: List[Dict[str, Any]],
+                     output_path: str,
+                     include_images: bool = True) -> bool:
+        """
+        Export results to PDF report.
+        Args:
+            results: List of processing results
+            output_path: Output file path
+            include_images: Whether to include images in the report
+        Returns:
+            True if successful, False otherwise
+        """
+        try:
+            doc = SimpleDocTemplate(output_path, pagesize=A4)
+            story = []
+            # Title
+            title = Paragraph("Sign Language Detection Report", self.custom_styles['CustomTitle'])
+            story.append(title)
+            story.append(Spacer(1, 20))
+            # Summary
+            successful_files = sum(1 for r in results if r.get('success', False))
+            total_hands = sum(r.get('hands_detected', 0) for r in results if r.get('success', False))
+            summary_text = f"""
+            <b>Processing Summary</b><br/>
+            Total Files: {len(results)}<br/>
+            Successful: {successful_files}<br/>
+            Total Hands Detected: {total_hands}<br/>
+            Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+            """
+            summary = Paragraph(summary_text, self.styles['Normal'])
+            story.append(summary)
+            story.append(Spacer(1, 20))
+            # Results for each file
+            for i, result in enumerate(results):
+                # File header
+                filename = result.get('filename', f'File {i+1}')
+                header = Paragraph(f"File: {filename}", self.custom_styles['CustomHeading'])
+                story.append(header)
+                if not result.get('success'):
+                    error_text = f"<font color='red'>Error: {result.get('error', 'Unknown error')}</font>"
+                    error_para = Paragraph(error_text, self.styles['Normal'])
+                    story.append(error_para)
+                    story.append(Spacer(1, 10))
+                    continue
+                # File info
+                file_info = [
+                    ['Property', 'Value'],
+                    ['File Type', result.get('file_type', 'Unknown')],
+                    ['File Size', f"{result.get('file_size', 0) / 1024:.1f} KB"],
+                    ['Hands Detected', str(result.get('hands_detected', 0))]
+                ]
+                if result.get('file_type') == 'video':
+                    video_props = result.get('video_properties', {})
+                    file_info.extend([
+                        ['Duration', f"{video_props.get('duration', 0):.1f}s"],
+                        ['FPS', f"{video_props.get('fps', 0):.1f}"],
+                        ['Total Frames', str(video_props.get('total_frames', 0))]
+                    ])
+                info_table = Table(file_info)
+                info_table.setStyle(TableStyle([
+                    ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
+                    ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
+                    ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
+                    ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
+                    ('FONTSIZE', (0, 0), (-1, 0), 12),
+                    ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
+                    ('BACKGROUND', (0, 1), (-1, -1), colors.beige),
+                    ('GRID', (0, 0), (-1, -1), 1, colors.black)
+                ]))
+                story.append(info_table)
+                story.append(Spacer(1, 15))
+                # Detection details
+                if result.get('detections'):
+                    detections_header = Paragraph("Detection Details", self.custom_styles['CustomSubheading'])
+                    story.append(detections_header)
+                    for j, detection in enumerate(result['detections']):
+                        detection_text = f"""
+                        <b>Hand {j+1}: {detection.get('hand_label', 'Unknown')}</b><br/>
+                        Confidence: {detection.get('confidence', 0):.1%}<br/>
+                        """
+                        if 'classification' in detection and detection['classification'].get('success'):
+                            classification = detection['classification']
+                            if classification.get('letter'):
+                                detection_text += f"Letter: <b>{classification['letter']}</b><br/>"
+                            if classification.get('word'):
+                                detection_text += f"Word: <b>{classification['word']}</b><br/>"
+                            if classification.get('confidence'):
+                                detection_text += f"AI Confidence: {classification['confidence']:.1%}<br/>"
+                        detection_para = Paragraph(detection_text, self.styles['Normal'])
+                        story.append(detection_para)
+                        story.append(Spacer(1, 10))
+                story.append(Spacer(1, 20))
+            # Build PDF
+            doc.build(story)
+            return True
+        except Exception as e:
+            print(f"Error exporting to PDF: {e}")
+            return False
+    def _clean_result_for_export(self, result: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Clean result dictionary for export by converting numpy arrays to lists.
+        Args:
+            result: Result dictionary
+        Returns:
+            Cleaned result dictionary
+        """
+        clean_result = {}
+        for key, value in result.items():
+            if isinstance(value, np.ndarray):
+                # Convert numpy arrays to base64 encoded strings for images
+                if key in ['annotated_image', 'enhanced_image', 'comparison_image', 'original_image']:
+                    try:
+                        # Convert to PIL Image and then to base64
+                        if len(value.shape) == 3:
+                            # Convert BGR to RGB for proper color representation
+                            value_rgb = cv2.cvtColor(value, cv2.COLOR_BGR2RGB)
+                            pil_image = Image.fromarray(value_rgb)
+                        else:
+                            pil_image = Image.fromarray(value)
+                        buffer = io.BytesIO()
+                        pil_image.save(buffer, format='PNG')
+                        img_str = base64.b64encode(buffer.getvalue()).decode()
+                        clean_result[key] = f"data:image/png;base64,{img_str}"
+                    except:
+                        clean_result[key] = None
+                else:
+                    clean_result[key] = value.tolist()
+            elif isinstance(value, (list, dict)):
+                clean_result[key] = value
+            else:
+                clean_result[key] = value
+        return clean_result
+    def create_summary_report(self, results: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """
+        Create a summary report of the processing results.
+        Args:
+            results: List of processing results
+        Returns:
+            Summary report dictionary
+        """
+        summary = {
+            'total_files': len(results),
+            'successful_files': 0,
+            'failed_files': 0,
+            'total_hands_detected': 0,
+            'file_types': {},
+            'detected_letters': {},
+            'detected_words': {},
+            'average_confidence': 0,
+            'processing_errors': []
+        }
+        confidences = []
+        for result in results:
+            if result.get('success'):
+                summary['successful_files'] += 1
+                summary['total_hands_detected'] += result.get('hands_detected', 0)
+                # File type statistics
+                file_type = result.get('file_type', 'unknown')
+                summary['file_types'][file_type] = summary['file_types'].get(file_type, 0) + 1
+                # Process detections
+                for detection in result.get('detections', []):
+                    if 'confidence' in detection:
+                        confidences.append(detection['confidence'])
+                    if 'classification' in detection and detection['classification'].get('success'):
+                        classification = detection['classification']
+                        if classification.get('letter'):
+                            letter = classification['letter']
+                            summary['detected_letters'][letter] = summary['detected_letters'].get(letter, 0) + 1
+                        if classification.get('word'):
+                            word = classification['word']
+                            summary['detected_words'][word] = summary['detected_words'].get(word, 0) + 1
+            else:
+                summary['failed_files'] += 1
+                summary['processing_errors'].append({
+                    'filename': result.get('filename', 'unknown'),
+                    'error': result.get('error', 'unknown error')
+                })
+        # Calculate average confidence
+        if confidences:
+            summary['average_confidence'] = sum(confidences) / len(confidences)
+        return summary

src/src/fallback_classifier.py ADDED Viewed

	@@ -0,0 +1,303 @@

+"""
+Fallback Sign Language Classifier
+This module provides basic sign language classification without requiring OpenAI API.
+It uses rule-based pattern matching to identify common ASL letters and gestures.
+"""
+from typing import Dict, Any, Optional
+import re
+class FallbackSignLanguageClassifier:
+    """
+    Fallback classifier for basic ASL recognition using pattern matching.
+    """
+    def __init__(self):
+        """Initialize the fallback classifier."""
+        self.debug = True
+        print("Fallback classifier initialized (no API required)")
+    def classify_gesture(self, gesture_description: str,
+                        sign_language: str = "ASL",
+                        context: Optional[str] = None) -> Dict[str, Any]:
+        """
+        Classify gesture using rule-based pattern matching.
+        Args:
+            gesture_description: Description of the hand gesture
+            sign_language: Sign language type (default: ASL)
+            context: Additional context (optional)
+        Returns:
+            Classification result dictionary
+        """
+        if self.debug:
+            print(f"\n=== Fallback Classification Debug ===")
+            print(f"Input: {gesture_description}")
+        try:
+            # Analyze the gesture description
+            result = self._analyze_gesture_patterns(gesture_description)
+            result['success'] = True
+            result['method'] = 'fallback_pattern_matching'
+            if self.debug:
+                print(f"Result: {result}")
+                print("=== End Fallback Debug ===\n")
+            return result
+        except Exception as e:
+            if self.debug:
+                print(f"Fallback classification error: {str(e)}")
+                print("=== End Fallback Debug ===\n")
+            return {
+                'success': False,
+                'error': str(e),
+                'letter': None,
+                'word': None,
+                'confidence': 0.0,
+                'description': 'Fallback classification failed',
+                'method': 'fallback_pattern_matching'
+            }
+    def _analyze_gesture_patterns(self, description: str) -> Dict[str, Any]:
+        """
+        Analyze gesture description using pattern matching rules.
+        Args:
+            description: Gesture description string
+        Returns:
+            Classification result
+        """
+        desc_lower = description.lower()
+        # Extract key information
+        extended_fingers = self._extract_extended_fingers(desc_lower)
+        closed_fingers = self._extract_closed_fingers(desc_lower)
+        patterns = self._extract_patterns(desc_lower)
+        # Rule-based classification
+        letter, word, confidence, explanation = self._apply_classification_rules(
+            extended_fingers, closed_fingers, patterns, desc_lower
+        )
+        return {
+            'letter': letter,
+            'word': word,
+            'confidence': confidence,
+            'description': explanation,
+            'extended_fingers': extended_fingers,
+            'closed_fingers': closed_fingers,
+            'patterns': patterns
+        }
+    def _extract_extended_fingers(self, description: str) -> list:
+        """Extract extended fingers from description."""
+        extended = []
+        if 'extended fingers:' in description:
+            # Find the extended fingers section
+            start = description.find('extended fingers:') + len('extended fingers:')
+            end = description.find(';', start)
+            if end == -1:
+                end = len(description)
+            fingers_text = description[start:end].strip()
+            # Extract individual fingers
+            if 'thumb' in fingers_text:
+                extended.append('thumb')
+            if 'index' in fingers_text:
+                extended.append('index')
+            if 'middle' in fingers_text:
+                extended.append('middle')
+            if 'ring' in fingers_text:
+                extended.append('ring')
+            if 'pinky' in fingers_text:
+                extended.append('pinky')
+        return extended
+    def _extract_closed_fingers(self, description: str) -> list:
+        """Extract closed fingers from description."""
+        closed = []
+        if 'closed fingers:' in description:
+            # Find the closed fingers section
+            start = description.find('closed fingers:') + len('closed fingers:')
+            end = description.find(';', start)
+            if end == -1:
+                end = len(description)
+            fingers_text = description[start:end].strip()
+            # Extract individual fingers
+            if 'thumb' in fingers_text:
+                closed.append('thumb')
+            if 'index' in fingers_text:
+                closed.append('index')
+            if 'middle' in fingers_text:
+                closed.append('middle')
+            if 'ring' in fingers_text:
+                closed.append('ring')
+            if 'pinky' in fingers_text:
+                closed.append('pinky')
+        return closed
+    def _extract_patterns(self, description: str) -> list:
+        """Extract gesture patterns from description."""
+        patterns = []
+        if 'closed fist' in description:
+            patterns.append('closed_fist')
+        if 'open hand' in description:
+            patterns.append('open_hand')
+        if 'pointing gesture' in description:
+            patterns.append('pointing')
+        if 'pinch gesture' in description:
+            patterns.append('pinch')
+        return patterns
+    def _apply_classification_rules(self, extended: list, closed: list,
+                                  patterns: list, description: str) -> tuple:
+        """
+        Apply enhanced ASL-specific classification logic.
+        Returns:
+            (letter, word, confidence, explanation)
+        """
+        # PRECISE ASL RULES based on exact finger positions
+        # Rule 1: Single finger extended
+        if len(extended) == 1:
+            if 'index' in extended:
+                return '1', None, 0.9, "Index finger only = Number 1"
+            elif 'pinky' in extended:
+                return None, 'I', 0.9, "Pinky finger only = Pronoun I"
+            elif 'thumb' in extended:
+                return None, 'GOOD', 0.8, "Thumb up = GOOD"
+            elif 'middle' in extended:
+                return None, 'BAD', 0.6, "Middle finger = BAD (rude gesture)"
+        # Rule 2: Two fingers extended
+        if len(extended) == 2:
+            if 'index' in extended and 'middle' in extended:
+                return '2', None, 0.9, "Index and middle = Number 2"
+            elif 'index' in extended and 'thumb' in extended:
+                return 'L', None, 0.8, "Index and thumb = Letter L"
+            elif 'index' in extended and 'pinky' in extended:
+                return None, 'I LOVE YOU', 0.9, "Index and pinky = I LOVE YOU sign"
+            elif 'thumb' in extended and 'pinky' in extended:
+                return None, 'CALL', 0.7, "Thumb and pinky = CALL/PHONE"
+        # Rule 3: Three fingers extended
+        if len(extended) == 3:
+            if 'index' in extended and 'middle' in extended and 'ring' in extended:
+                return '3', None, 0.9, "Three middle fingers = Number 3"
+            elif 'thumb' in extended and 'index' in extended and 'pinky' in extended:
+                return None, 'I LOVE YOU', 0.9, "Thumb, index, pinky = I LOVE YOU"
+        # Rule 4: Four fingers extended (thumb closed)
+        if len(extended) == 4 and 'thumb' in closed:
+            return '4', None, 0.9, "Four fingers, thumb closed = Number 4"
+        # Rule 5: All five fingers extended
+        if len(extended) == 5:
+            return '5', None, 0.9, "All fingers extended = Number 5"
+        # Rule 6: Closed fist (no fingers extended)
+        if len(extended) == 0 or 'closed_fist' in patterns:
+            return 'A', None, 0.8, "Closed fist = Letter A"
+        # Rule 7: Four fingers extended (index, middle, ring, pinky) - thumb closed
+        if (len(extended) == 4 and 'index' in extended and 'middle' in extended
+            and 'ring' in extended and 'pinky' in extended and 'thumb' in closed):
+            return None, 'HELLO', 0.8, "Four fingers extended = HELLO"
+        # Rule 8: Pinch gesture pattern
+        if 'pinch' in patterns:
+            return 'F', None, 0.7, "Pinch gesture = Letter F"
+        # Rule 9: Pointing gesture pattern
+        if 'pointing' in patterns:
+            if 'index' in extended and len(extended) == 1:
+                return '1', None, 0.8, "Pointing with index = Number 1"
+            else:
+                return None, 'YOU', 0.6, "Pointing gesture = YOU"
+        # Rule 10: Open hand pattern
+        if 'open_hand' in patterns:
+            if len(extended) == 5:
+                return '5', None, 0.8, "Open hand = Number 5"
+            else:
+                return None, 'HELLO', 0.7, "Open hand = HELLO"
+        # Default fallback based on finger count with lower confidence
+        finger_count = len(extended)
+        if finger_count == 0:
+            return 'A', None, 0.4, f"No extended fingers, default to A"
+        elif finger_count == 1:
+            return '1', None, 0.4, f"One finger extended, default to 1"
+        elif finger_count == 2:
+            return '2', None, 0.4, f"Two fingers extended, default to 2"
+        elif finger_count == 3:
+            return '3', None, 0.4, f"Three fingers extended, default to 3"
+        elif finger_count == 4:
+            return '4', None, 0.4, f"Four fingers extended, default to 4"
+        elif finger_count == 5:
+            return '5', None, 0.4, f"Five fingers extended, default to 5"
+        else:
+            return None, None, 0.1, "Unable to classify gesture"
+    def classify_sequence(self, gesture_descriptions: list,
+                         sign_language: str = "ASL") -> Dict[str, Any]:
+        """
+        Classify a sequence of gestures (fallback implementation).
+        Args:
+            gesture_descriptions: List of gesture descriptions
+            sign_language: Sign language type
+        Returns:
+            Sequence classification result
+        """
+        # Simple implementation: classify each gesture and combine
+        letters = []
+        words = []
+        for desc in gesture_descriptions:
+            result = self.classify_gesture(desc, sign_language)
+            if result.get('success'):
+                if result.get('letter'):
+                    letters.append(result['letter'])
+                if result.get('word'):
+                    words.append(result['word'])
+        # Try to form words from letters
+        if letters and not words:
+            letter_sequence = ''.join(letters)
+            # Check for common words
+            common_words = {
+                'HI': 'HI',
+                'NO': 'NO',
+                'OK': 'OK',
+                'YES': 'YES'
+            }
+            if letter_sequence in common_words:
+                words.append(common_words[letter_sequence])
+        return {
+            'success': True,
+            'word': words[0] if words else None,
+            'sentence': ' '.join(words) if len(words) > 1 else None,
+            'confidence': 0.6,
+            'individual_letters': letters,
+            'method': 'fallback_sequence_matching'
+        }

src/src/file_handler.py ADDED Viewed

	@@ -0,0 +1,543 @@

+"""
+File Input Handler for Sign Language Detection
+Processes video and image files for gesture analysis
+"""
+import cv2
+import numpy as np
+import os
+from typing import List, Dict, Any, Optional, Tuple, Generator
+from PIL import Image
+import time
+from .hand_detector import HandDetector
+from .gesture_extractor import GestureExtractor
+from .openai_classifier import SignLanguageClassifier
+from .gemini_classifier import GeminiSignLanguageClassifier
+from .prediction_logger import PredictionLogger
+from .visualization_utils import HandLandmarkVisualizer, create_comparison_view
+class FileHandler:
+    """
+    Handles file input (images and videos) for sign language detection.
+    """
+    def __init__(self,
+                 frame_skip: int = 5,
+                 max_frames: int = 100):
+        """
+        Initialize the FileHandler.
+        Args:
+            frame_skip: Number of frames to skip between processing (for videos)
+            max_frames: Maximum number of frames to process from a video
+        """
+        self.frame_skip = frame_skip
+        self.max_frames = max_frames
+        # Initialize components
+        self.hand_detector = HandDetector(static_image_mode=True)
+        self.gesture_extractor = GestureExtractor()
+        self.classifier = None
+        self.visualizer = HandLandmarkVisualizer()
+        self.logger = PredictionLogger(debug=True)
+        # Supported file formats
+        self.supported_image_formats = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp'}
+        self.supported_video_formats = {'.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv'}
+    def initialize_classifier(self, api_key: Optional[str] = None, use_gemini: bool = True) -> bool:
+        """
+        Initialize the AI classifier (Gemini or OpenAI).
+        Args:
+            api_key: API key (Gemini or OpenAI)
+            use_gemini: Whether to use Gemini instead of OpenAI (default: True)
+        Returns:
+            True if classifier initialized successfully, False otherwise
+        """
+        # Check environment variable for preference
+        use_gemini_env = os.getenv('USE_GEMINI', 'True').lower() == 'true'
+        use_gemini = use_gemini and use_gemini_env
+        if use_gemini:
+            try:
+                self.classifier = GeminiSignLanguageClassifier(api_key=api_key)
+                print("✅ Gemini AI classifier initialized for file processing")
+                return True
+            except Exception as e:
+                print(f"⚠️ Failed to initialize Gemini classifier: {e}")
+                print("🔄 Falling back to OpenAI classifier...")
+                # Fallback to OpenAI
+                try:
+                    self.classifier = SignLanguageClassifier(api_key=api_key)
+                    print("✅ OpenAI classifier initialized as fallback")
+                    return True
+                except Exception as e2:
+                    print(f"❌ OpenAI classifier also failed: {e2}")
+                    print("🔧 Will use pattern-based fallback only")
+                    return False
+        else:
+            try:
+                self.classifier = SignLanguageClassifier(api_key=api_key)
+                print("✅ OpenAI classifier initialized for file processing")
+                return True
+            except Exception as e:
+                print(f"❌ Failed to initialize OpenAI classifier: {e}")
+                print("🔧 Will use pattern-based fallback only")
+                return False
+    def is_supported_file(self, file_path: str) -> bool:
+        """
+        Check if the file format is supported.
+        Args:
+            file_path: Path to the file
+        Returns:
+            True if file format is supported, False otherwise
+        """
+        if not os.path.exists(file_path):
+            return False
+        file_ext = os.path.splitext(file_path)[1].lower()
+        return file_ext in self.supported_image_formats or file_ext in self.supported_video_formats
+    def get_file_type(self, file_path: str) -> str:
+        """
+        Determine if file is image or video.
+        Args:
+            file_path: Path to the file
+        Returns:
+            'image', 'video', or 'unknown'
+        """
+        file_ext = os.path.splitext(file_path)[1].lower()
+        if file_ext in self.supported_image_formats:
+            return 'image'
+        elif file_ext in self.supported_video_formats:
+            return 'video'
+        else:
+            return 'unknown'
+    def process_image(self, image_path: str) -> Dict[str, Any]:
+        """
+        Process a single image file for gesture detection.
+        Args:
+            image_path: Path to the image file
+        Returns:
+            Dictionary containing processing results
+        """
+        if not os.path.exists(image_path):
+            return {'success': False, 'error': 'File not found'}
+        try:
+            # Load image
+            image = cv2.imread(image_path)
+            if image is None:
+                return {'success': False, 'error': 'Could not load image'}
+            # Detect hands
+            annotated_image, hand_landmarks = self.hand_detector.detect_hands(image)
+            print(f"\n=== Hand Detection Debug ===")
+            print(f"Processing image: {os.path.basename(image_path)}")
+            print(f"Image shape: {image.shape}")
+            print(f"Hands detected: {len(hand_landmarks) if hand_landmarks else 0}")
+            if hand_landmarks:
+                for i, hand in enumerate(hand_landmarks):
+                    print(f"Hand {i+1}: {hand['label']}, confidence: {hand['confidence']:.3f}")
+            print("=== End Hand Detection Debug ===\n")
+            # Create enhanced visualization
+            enhanced_image = self.visualizer.draw_enhanced_landmarks(image, hand_landmarks) if hand_landmarks else annotated_image
+            # Create comparison view
+            comparison_image = create_comparison_view(image, enhanced_image)
+            # Process gestures
+            detections = []
+            if hand_landmarks:
+                for hand_data in hand_landmarks:
+                    gesture_description = self.gesture_extractor.create_gesture_description(hand_data)
+                    detection = {
+                        'hand_label': hand_data['label'],
+                        'gesture_description': gesture_description,
+                        'confidence': hand_data['confidence'],
+                        'bounding_box': self.hand_detector.get_bounding_box(
+                            hand_data, image.shape[1], image.shape[0]
+                        ),
+                        'landmarks_3d': hand_data['landmarks']  # Store for 3D visualization
+                    }
+                    # Classify gesture if classifier available
+                    if self.classifier:
+                        print(f"\n=== File Handler Debug ===")
+                        print(f"Processing hand: {hand_data['label']}")
+                        print(f"Gesture description: {gesture_description}")
+                        classification = self.classifier.classify_gesture(gesture_description)
+                        detection['classification'] = classification
+                        print(f"Classification result: {classification}")
+                        print("=== End File Handler Debug ===\n")
+                    detections.append(detection)
+            return {
+                'success': True,
+                'file_path': image_path,
+                'file_type': 'image',
+                'image_shape': image.shape,
+                'hands_detected': len(hand_landmarks) if hand_landmarks else 0,
+                'detections': detections,
+                'annotated_image': annotated_image,
+                'enhanced_image': enhanced_image,
+                'comparison_image': comparison_image,
+                'original_image': image
+            }
+        except Exception as e:
+            return {'success': False, 'error': str(e)}
+    def process_video(self, video_path: str,
+                     progress_callback: Optional[callable] = None) -> Dict[str, Any]:
+        """
+        Process a video file for gesture detection.
+        Args:
+            video_path: Path to the video file
+            progress_callback: Optional callback for progress updates
+        Returns:
+            Dictionary containing processing results
+        """
+        if not os.path.exists(video_path):
+            return {'success': False, 'error': 'File not found'}
+        try:
+            # Open video
+            cap = cv2.VideoCapture(video_path)
+            if not cap.isOpened():
+                return {'success': False, 'error': 'Could not open video file'}
+            # Get video properties
+            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            duration = total_frames / fps if fps > 0 else 0
+            # Process frames
+            frame_detections = []
+            frame_count = 0
+            processed_frames = 0
+            while cap.isOpened() and processed_frames < self.max_frames:
+                ret, frame = cap.read()
+                if not ret:
+                    break
+                # Skip frames based on frame_skip setting
+                if frame_count % (self.frame_skip + 1) != 0:
+                    frame_count += 1
+                    continue
+                # Process frame
+                timestamp = frame_count / fps if fps > 0 else frame_count
+                frame_result = self._process_video_frame(frame, timestamp, frame_count)
+                if frame_result['hands_detected'] > 0:
+                    frame_detections.append(frame_result)
+                processed_frames += 1
+                frame_count += 1
+                # Progress callback
+                if progress_callback:
+                    progress = min(processed_frames / self.max_frames, frame_count / total_frames)
+                    progress_callback(progress)
+            cap.release()
+            # Analyze sequence if detections found
+            sequence_analysis = None
+            if frame_detections and self.classifier:
+                sequence_analysis = self._analyze_video_sequence(frame_detections)
+            return {
+                'success': True,
+                'file_path': video_path,
+                'file_type': 'video',
+                'video_properties': {
+                    'total_frames': total_frames,
+                    'fps': fps,
+                    'duration': duration,
+                    'processed_frames': processed_frames
+                },
+                'frame_detections': frame_detections,
+                'sequence_analysis': sequence_analysis,
+                'total_hands_detected': sum(f['hands_detected'] for f in frame_detections)
+            }
+        except Exception as e:
+            return {'success': False, 'error': str(e)}
+    def _process_video_frame(self, frame: np.ndarray,
+                           timestamp: float, frame_number: int) -> Dict[str, Any]:
+        """
+        Process a single video frame.
+        Args:
+            frame: Video frame as numpy array
+            timestamp: Timestamp in seconds
+            frame_number: Frame number
+        Returns:
+            Dictionary containing frame processing results
+        """
+        # Detect hands
+        annotated_frame, hand_landmarks = self.hand_detector.detect_hands(frame)
+        # Process gestures
+        detections = []
+        if hand_landmarks:
+            for hand_data in hand_landmarks:
+                gesture_description = self.gesture_extractor.create_gesture_description(hand_data)
+                detection = {
+                    'hand_label': hand_data['label'],
+                    'gesture_description': gesture_description,
+                    'confidence': hand_data['confidence']
+                }
+                # Classify gesture if classifier available
+                if self.classifier:
+                    classification = self.classifier.classify_gesture(gesture_description)
+                    detection['classification'] = classification
+                detections.append(detection)
+        return {
+            'timestamp': timestamp,
+            'frame_number': frame_number,
+            'hands_detected': len(hand_landmarks) if hand_landmarks else 0,
+            'detections': detections
+        }
+    def _analyze_video_sequence(self, frame_detections: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """
+        Analyze sequence of video frame detections.
+        Args:
+            frame_detections: List of frame detection results
+        Returns:
+            Dictionary containing sequence analysis
+        """
+        if not self.classifier:
+            return {'error': 'Classifier not initialized'}
+        try:
+            # Extract gesture descriptions from frames with detections
+            gesture_descriptions = []
+            for frame_data in frame_detections:
+                for detection in frame_data['detections']:
+                    if detection.get('classification', {}).get('success', False):
+                        gesture_descriptions.append(detection['gesture_description'])
+            if len(gesture_descriptions) < 2:
+                return {'error': 'Not enough gestures for sequence analysis'}
+            # Classify sequence
+            sequence_result = self.classifier.classify_sequence(gesture_descriptions)
+            # Add timing information
+            sequence_result['start_time'] = frame_detections[0]['timestamp']
+            sequence_result['end_time'] = frame_detections[-1]['timestamp']
+            sequence_result['duration'] = sequence_result['end_time'] - sequence_result['start_time']
+            sequence_result['gesture_count'] = len(gesture_descriptions)
+            return sequence_result
+        except Exception as e:
+            return {'error': str(e)}
+    def create_thumbnail(self, file_path: str, size: Tuple[int, int] = (150, 150)) -> Optional[np.ndarray]:
+        """
+        Create a thumbnail for the given file.
+        Args:
+            file_path: Path to the file
+            size: Thumbnail size (width, height)
+        Returns:
+            Thumbnail image or None if failed
+        """
+        try:
+            file_type = self.get_file_type(file_path)
+            if file_type == 'image':
+                image = cv2.imread(file_path)
+                if image is not None:
+                    thumbnail = cv2.resize(image, size)
+                    return thumbnail
+            elif file_type == 'video':
+                cap = cv2.VideoCapture(file_path)
+                if cap.isOpened():
+                    ret, frame = cap.read()
+                    if ret:
+                        thumbnail = cv2.resize(frame, size)
+                        cap.release()
+                        return thumbnail
+                    cap.release()
+        except Exception as e:
+            print(f"Error creating thumbnail for {file_path}: {e}")
+        return None
+    def get_file_metadata(self, file_path: str) -> Dict[str, Any]:
+        """
+        Get metadata for a file.
+        Args:
+            file_path: Path to the file
+        Returns:
+            Dictionary containing file metadata
+        """
+        metadata = {
+            'file_path': file_path,
+            'filename': os.path.basename(file_path),
+            'file_size': os.path.getsize(file_path) if os.path.exists(file_path) else 0,
+            'file_type': self.get_file_type(file_path),
+            'supported': self.is_supported_file(file_path)
+        }
+        try:
+            file_type = metadata['file_type']
+            if file_type == 'image':
+                image = cv2.imread(file_path)
+                if image is not None:
+                    metadata.update({
+                        'width': image.shape[1],
+                        'height': image.shape[0],
+                        'channels': image.shape[2] if len(image.shape) > 2 else 1
+                    })
+            elif file_type == 'video':
+                cap = cv2.VideoCapture(file_path)
+                if cap.isOpened():
+                    metadata.update({
+                        'width': int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
+                        'height': int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)),
+                        'fps': cap.get(cv2.CAP_PROP_FPS),
+                        'frame_count': int(cap.get(cv2.CAP_PROP_FRAME_COUNT)),
+                        'duration': int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) / cap.get(cv2.CAP_PROP_FPS) if cap.get(cv2.CAP_PROP_FPS) > 0 else 0
+                    })
+                    cap.release()
+        except Exception as e:
+            metadata['error'] = str(e)
+        return metadata
+    def batch_process_files(self, file_paths: List[str],
+                          progress_callback: Optional[callable] = None,
+                          detailed_progress: Optional[callable] = None) -> List[Dict[str, Any]]:
+        """
+        Enhanced batch processing with detailed progress tracking.
+        Args:
+            file_paths: List of file paths to process
+            progress_callback: Optional callback for overall progress updates
+            detailed_progress: Optional callback for detailed progress updates
+        Returns:
+            List of processing results for each file
+        """
+        results = []
+        total_files = len(file_paths)
+        for i, file_path in enumerate(file_paths):
+            # Update detailed progress
+            if detailed_progress:
+                detailed_progress(f"Processing {os.path.basename(file_path)}...", i, total_files)
+            if not self.is_supported_file(file_path):
+                results.append({
+                    'success': False,
+                    'file_path': file_path,
+                    'filename': os.path.basename(file_path),
+                    'error': 'Unsupported file format'
+                })
+                continue
+            try:
+                file_type = self.get_file_type(file_path)
+                if file_type == 'image':
+                    result = self.process_image(file_path)
+                elif file_type == 'video':
+                    result = self.process_video(file_path, progress_callback=None)  # Disable nested progress
+                else:
+                    result = {
+                        'success': False,
+                        'file_path': file_path,
+                        'filename': os.path.basename(file_path),
+                        'error': 'Unknown file type'
+                    }
+                # Add metadata
+                if result.get('success'):
+                    metadata = self.get_file_metadata(file_path)
+                    result.update(metadata)
+                results.append(result)
+            except Exception as e:
+                results.append({
+                    'success': False,
+                    'file_path': file_path,
+                    'filename': os.path.basename(file_path),
+                    'error': str(e)
+                })
+            # Update overall progress
+            if progress_callback:
+                progress_callback((i + 1) / total_files)
+        return results
+    def save_annotated_image(self, annotated_image: np.ndarray,
+                           output_path: str) -> bool:
+        """
+        Save annotated image to file.
+        Args:
+            annotated_image: Annotated image array
+            output_path: Path to save the image
+        Returns:
+            True if saved successfully, False otherwise
+        """
+        try:
+            cv2.imwrite(output_path, annotated_image)
+            return True
+        except Exception as e:
+            print(f"Error saving annotated image: {e}")
+            return False
+    def cleanup(self):
+        """Clean up resources."""
+        if self.hand_detector:
+            self.hand_detector.cleanup()

src/src/gemini_classifier.py ADDED Viewed

	@@ -0,0 +1,420 @@

+"""
+Google Gemini Sign Language Classifier
+This module provides sign language classification using Google's Gemini AI API.
+"""
+import google.generativeai as genai
+import os
+from typing import List, Dict, Any, Optional
+import json
+import time
+from dotenv import load_dotenv
+from .fallback_classifier import FallbackSignLanguageClassifier
+# Load environment variables
+load_dotenv()
+class GeminiSignLanguageClassifier:
+    """
+    Sign language classifier using Google Gemini AI.
+    """
+    def __init__(self, api_key: Optional[str] = None, model: str = "gemini-1.5-flash"):
+        """
+        Initialize the Gemini classifier.
+        Args:
+            api_key: Gemini API key (if None, will use environment variable)
+            model: Gemini model to use for classification
+        """
+        self.api_key = api_key or os.getenv('GEMINI_API_KEY')
+        self.model_name = model
+        if not self.api_key:
+            raise ValueError("Gemini API key not provided. Set GEMINI_API_KEY environment variable or pass api_key parameter.")
+        # Configure Gemini
+        genai.configure(api_key=self.api_key)
+        self.model = genai.GenerativeModel(self.model_name)
+        # Enhanced rate limiting for free tier
+        self.last_request_time = 0
+        self.min_request_interval = 5.0  # 5 seconds between requests for free tier
+        self.request_count = 0
+        self.request_window_start = time.time()
+        self.max_requests_per_minute = 10  # Conservative limit for free tier
+        # Initialize fallback classifier
+        self.fallback_classifier = FallbackSignLanguageClassifier()
+        # Debug mode
+        self.debug = True
+        print(f"Gemini classifier initialized with fallback support")
+    def classify_gesture(self, gesture_description: str,
+                        sign_language: str = "ASL",
+                        context: Optional[str] = None) -> Dict[str, Any]:
+        """
+        Classify a single gesture using Gemini AI.
+        Args:
+            gesture_description: Description of the hand gesture
+            sign_language: Sign language type (default: ASL)
+            context: Additional context (optional)
+        Returns:
+            Classification result dictionary
+        """
+        self._rate_limit()
+        # Create the prompt
+        prompt = self._create_classification_prompt(gesture_description, sign_language, context)
+        if self.debug:
+            print(f"\n=== Gemini Classification Debug ===")
+            print(f"Input gesture description: {gesture_description}")
+            print(f"Prompt sent to Gemini: {prompt[:200]}...")
+        try:
+            response = self.model.generate_content(prompt)
+            response_content = response.text
+            if self.debug:
+                print(f"Gemini response: {response_content}")
+            result = self._parse_response(response_content)
+            result['raw_response'] = response_content
+            result['success'] = True
+            result['method'] = 'gemini_ai'
+            if self.debug:
+                print(f"Parsed result: {result}")
+                print("=== End Gemini Debug ===\n")
+            return result
+        except Exception as e:
+            error_msg = str(e)
+            if self.debug:
+                print(f"Gemini API Error: {error_msg}")
+                print("Falling back to pattern-based classification...")
+            # Use fallback classifier when Gemini API fails
+            try:
+                fallback_result = self.fallback_classifier.classify_gesture(
+                    gesture_description, sign_language, context
+                )
+                fallback_result['fallback_used'] = True
+                fallback_result['gemini_error'] = error_msg
+                if self.debug:
+                    print(f"Fallback result: {fallback_result}")
+                    print("=== End Gemini Debug ===\n")
+                return fallback_result
+            except Exception as fallback_error:
+                if self.debug:
+                    print(f"Fallback also failed: {str(fallback_error)}")
+                    print("=== End Gemini Debug ===\n")
+                return {
+                    'success': False,
+                    'error': error_msg,
+                    'fallback_error': str(fallback_error),
+                    'letter': None,
+                    'word': None,
+                    'confidence': 0.0,
+                    'description': None,
+                    'method': 'gemini_ai'
+                }
+    def classify_sequence(self, gesture_descriptions: List[str],
+                         sign_language: str = "ASL") -> Dict[str, Any]:
+        """
+        Classify a sequence of gestures using Gemini AI.
+        Args:
+            gesture_descriptions: List of gesture descriptions
+            sign_language: Sign language type
+        Returns:
+            Sequence classification result
+        """
+        self._rate_limit()
+        # Create sequence prompt
+        prompt = self._create_sequence_prompt(gesture_descriptions, sign_language)
+        try:
+            response = self.model.generate_content(prompt)
+            response_content = response.text
+            result = self._parse_sequence_response(response_content)
+            result['raw_response'] = response_content
+            result['success'] = True
+            result['method'] = 'gemini_ai'
+            return result
+        except Exception as e:
+            # Use fallback for sequence classification too
+            try:
+                fallback_result = self.fallback_classifier.classify_sequence(
+                    gesture_descriptions, sign_language
+                )
+                fallback_result['fallback_used'] = True
+                fallback_result['gemini_error'] = str(e)
+                return fallback_result
+            except Exception as fallback_error:
+                return {
+                    'success': False,
+                    'error': str(e),
+                    'fallback_error': str(fallback_error),
+                    'word': None,
+                    'sentence': None,
+                    'confidence': 0.0,
+                    'method': 'gemini_ai'
+                }
+    def _rate_limit(self):
+        """Enhanced rate limiting for Gemini free tier."""
+        current_time = time.time()
+        # Reset request count every minute
+        if current_time - self.request_window_start >= 60:
+            self.request_count = 0
+            self.request_window_start = current_time
+        # Check if we've hit the per-minute limit
+        if self.request_count >= self.max_requests_per_minute:
+            sleep_time = 60 - (current_time - self.request_window_start) + 1
+            if self.debug:
+                print(f"⏳ Rate limit reached, sleeping for {sleep_time:.1f} seconds...")
+            time.sleep(sleep_time)
+            self.request_count = 0
+            self.request_window_start = time.time()
+        # Ensure minimum interval between requests
+        time_since_last_request = current_time - self.last_request_time
+        if time_since_last_request < self.min_request_interval:
+            sleep_time = self.min_request_interval - time_since_last_request
+            if self.debug:
+                print(f"⏳ Waiting {sleep_time:.1f} seconds between requests...")
+            time.sleep(sleep_time)
+        self.last_request_time = time.time()
+        self.request_count += 1
+    def _create_classification_prompt(self, gesture_description: str,
+                                    sign_language: str, context: Optional[str]) -> str:
+        """Create enhanced prompt for single gesture classification."""
+        prompt = f"""You are an expert ASL (American Sign Language) interpreter. Analyze this hand gesture and provide ONE CLEAR PREDICTION.
+GESTURE DATA:
+{gesture_description}
+COMMON ASL PATTERNS TO RECOGNIZE:
+• Index finger pointing = Number "1"
+• Pinky finger only = Pronoun "I"
+• Thumb up = "GOOD" or "YES"
+• All fingers extended = Number "5" or "HELLO"
+• Closed fist = Letter "A" or "S"
+• Index + middle = Number "2"
+• Three fingers = Number "3"
+• Four fingers = Number "4"
+• Index + pinky = "I LOVE YOU"
+• Thumb + index = Letter "L"
+TASK: Based on the finger positions described, identify what this gesture most likely represents:
+- A single letter (A-Z)
+- A single number (0-9)
+- A complete word (HELLO, GOOD, I, YOU, LOVE, etc.)
+Even if not a perfect match, provide your best interpretation based on ASL knowledge.
+"""
+        if context:
+            prompt += f"Context: {context}\n\n"
+        prompt += """Respond in this EXACT JSON format (choose ONE prediction):
+{
+    "letter": "1",
+    "word": null,
+    "confidence": 0.85,
+    "description": "Index finger pointing = Number 1"
+}
+OR for a word:
+{
+    "letter": null,
+    "word": "GOOD",
+    "confidence": 0.85,
+    "description": "Thumb up = GOOD"
+}
+IMPORTANT: Always provide either a letter OR a word, never both null. Make your best guess based on ASL knowledge."""
+        return prompt
+    def _create_sequence_prompt(self, gesture_descriptions: List[str],
+                              sign_language: str) -> str:
+        """Create prompt for gesture sequence classification."""
+        prompt = f"""Analyze this sequence of {sign_language} hand gestures:
+"""
+        for i, description in enumerate(gesture_descriptions, 1):
+            prompt += f"Gesture {i}: {description}\n"
+        prompt += f"""
+What word or sentence do these {sign_language} gestures spell out when combined?
+Consider the sequence and flow of the gestures.
+Respond in JSON format:
+{{
+    "word": "HELLO" or null,
+    "sentence": "HELLO WORLD" or null,
+    "confidence": 0.85,
+    "individual_letters": ["H", "E", "L", "L", "O"]
+}}"""
+        return prompt
+    def _parse_response(self, response_text: str) -> Dict[str, Any]:
+        """Parse Gemini response for single gesture classification."""
+        try:
+            # Try to parse as JSON first
+            if '{' in response_text and '}' in response_text:
+                json_start = response_text.find('{')
+                json_end = response_text.rfind('}') + 1
+                json_str = response_text[json_start:json_end]
+                result = json.loads(json_str)
+                # Extract values
+                letter = result.get('letter')
+                word = result.get('word')
+                confidence = float(result.get('confidence', 0.0))
+                description = result.get('description', '')
+                # If both are null, try to extract from description
+                if not letter and not word:
+                    if self.debug:
+                        print("⚠️ Gemini returned null values, trying to extract from description...")
+                    # Try to extract prediction from description
+                    desc_lower = description.lower()
+                    # Look for numbers
+                    for num in ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0']:
+                        if f"number '{num}'" in desc_lower or f"number {num}" in desc_lower:
+                            letter = num
+                            break
+                    # Look for letters
+                    if not letter:
+                        for char in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ':
+                            if f"letter '{char.lower()}'" in desc_lower or f"letter {char.lower()}" in desc_lower:
+                                letter = char
+                                break
+                    # Look for words
+                    if not letter and not word:
+                        common_words = ['good', 'hello', 'i', 'you', 'love', 'yes', 'no', 'please', 'thank you']
+                        for w in common_words:
+                            if w in desc_lower:
+                                word = w.upper()
+                                break
+                return {
+                    'letter': letter,
+                    'word': word,
+                    'confidence': confidence,
+                    'description': description
+                }
+            else:
+                # Fallback: simple text parsing
+                return self._parse_text_response(response_text)
+        except (json.JSONDecodeError, ValueError):
+            return self._parse_text_response(response_text)
+    def _parse_sequence_response(self, response_text: str) -> Dict[str, Any]:
+        """Parse Gemini response for sequence classification."""
+        try:
+            if '{' in response_text and '}' in response_text:
+                json_start = response_text.find('{')
+                json_end = response_text.rfind('}') + 1
+                json_str = response_text[json_start:json_end]
+                result = json.loads(json_str)
+                return {
+                    'word': result.get('word'),
+                    'sentence': result.get('sentence'),
+                    'confidence': float(result.get('confidence', 0.0)),
+                    'individual_letters': result.get('individual_letters', [])
+                }
+            else:
+                return self._parse_sequence_text_response(response_text)
+        except (json.JSONDecodeError, ValueError):
+            return self._parse_sequence_text_response(response_text)
+    def _parse_text_response(self, response_text: str) -> Dict[str, Any]:
+        """Enhanced fallback text parsing for single gesture."""
+        response_lower = response_text.lower()
+        # Common ASL words to look for
+        common_words = ['hello', 'hungry', 'thank you', 'please', 'sorry', 'yes', 'no',
+                       'i', 'you', 'love', 'help', 'more', 'water', 'eat', 'drink',
+                       'good', 'bad', 'happy', 'sad', 'stop', 'go', 'come', 'home']
+        # Look for words first (priority)
+        word = None
+        for w in common_words:
+            if w in response_lower:
+                word = w.upper()
+                break
+        # Look for letter patterns
+        letter = None
+        if not word:  # Only look for letters if no word found
+            import re
+            # Look for single letters
+            letter_match = re.search(r'\b([A-Z])\b', response_text.upper())
+            if letter_match:
+                letter = letter_match.group(1)
+            # Look for numbers
+            number_match = re.search(r'\b([0-9])\b', response_text)
+            if number_match:
+                letter = number_match.group(1)
+        # Extract confidence if mentioned
+        confidence = 0.5  # Default
+        conf_match = re.search(r'(\d+(?:\.\d+)?)\s*%', response_text)
+        if conf_match:
+            confidence = float(conf_match.group(1)) / 100
+        return {
+            'letter': letter,
+            'word': word,
+            'confidence': confidence,
+            'description': f"Parsed from text: {response_text[:100]}..."
+        }
+    def _parse_sequence_text_response(self, response_text: str) -> Dict[str, Any]:
+        """Fallback text parsing for sequence."""
+        # Simple implementation for sequence parsing
+        return {
+            'word': None,
+            'sentence': None,
+            'confidence': 0.3,
+            'individual_letters': [],
+            'description': f"Text parsing fallback: {response_text[:100]}..."
+        }

src/src/gesture_extractor.py ADDED Viewed

	@@ -0,0 +1,270 @@

+"""
+Gesture Feature Extraction Module
+Processes hand landmark data into simplified format for OpenAI API classification
+"""
+import numpy as np
+import math
+from typing import List, Dict, Any, Tuple, Optional
+class GestureExtractor:
+    """
+    A class for extracting gesture features from hand landmarks.
+    """
+    def __init__(self):
+        """Initialize the GestureExtractor."""
+        # Define finger tip and base indices for easier processing
+        self.finger_tips = [4, 8, 12, 16, 20]  # Thumb, Index, Middle, Ring, Pinky tips
+        self.finger_bases = [2, 5, 9, 13, 17]  # Finger base joints
+        self.finger_pips = [3, 6, 10, 14, 18]  # PIP joints
+    def normalize_landmarks(self, hand_landmarks: Dict[str, Any]) -> List[Dict[str, float]]:
+        """
+        Normalize hand landmarks relative to wrist position and hand size.
+        Args:
+            hand_landmarks: Hand landmark data from MediaPipe
+        Returns:
+            List of normalized landmark coordinates
+        """
+        landmarks = hand_landmarks['landmarks']
+        # Get wrist position (landmark 0)
+        wrist = landmarks[0]
+        wrist_x, wrist_y = wrist['x'], wrist['y']
+        # Calculate hand size (distance from wrist to middle finger MCP)
+        middle_mcp = landmarks[9]
+        hand_size = math.sqrt(
+            (middle_mcp['x'] - wrist_x) ** 2 +
+            (middle_mcp['y'] - wrist_y) ** 2
+        )
+        # Avoid division by zero
+        if hand_size == 0:
+            hand_size = 1.0
+        # Normalize all landmarks
+        normalized_landmarks = []
+        for landmark in landmarks:
+            normalized = {
+                'x': (landmark['x'] - wrist_x) / hand_size,
+                'y': (landmark['y'] - wrist_y) / hand_size,
+                'z': landmark['z'] / hand_size
+            }
+            normalized_landmarks.append(normalized)
+        return normalized_landmarks
+    def extract_finger_states(self, normalized_landmarks: List[Dict[str, float]]) -> Dict[str, bool]:
+        """
+        Determine which fingers are extended or closed.
+        Args:
+            normalized_landmarks: Normalized landmark coordinates
+        Returns:
+            Dictionary with finger states (True = extended, False = closed)
+        """
+        finger_names = ['thumb', 'index', 'middle', 'ring', 'pinky']
+        finger_states = {}
+        for i, finger_name in enumerate(finger_names):
+            tip_idx = self.finger_tips[i]
+            pip_idx = self.finger_pips[i]
+            # For thumb, use different logic (horizontal movement)
+            if finger_name == 'thumb':
+                # Compare thumb tip with thumb IP joint
+                tip_x = normalized_landmarks[tip_idx]['x']
+                ip_x = normalized_landmarks[3]['x']  # Thumb IP joint
+                finger_states[finger_name] = abs(tip_x - ip_x) > 0.1
+            else:
+                # For other fingers, compare tip Y with PIP Y
+                tip_y = normalized_landmarks[tip_idx]['y']
+                pip_y = normalized_landmarks[pip_idx]['y']
+                finger_states[finger_name] = tip_y < pip_y  # Extended if tip is above PIP
+        return finger_states
+    def calculate_angles(self, normalized_landmarks: List[Dict[str, float]]) -> Dict[str, float]:
+        """
+        Calculate angles between key landmarks.
+        Args:
+            normalized_landmarks: Normalized landmark coordinates
+        Returns:
+            Dictionary of calculated angles
+        """
+        angles = {}
+        # Calculate angle between thumb and index finger
+        thumb_tip = normalized_landmarks[4]
+        index_tip = normalized_landmarks[8]
+        wrist = normalized_landmarks[0]
+        # Vector from wrist to thumb tip
+        thumb_vector = np.array([thumb_tip['x'] - wrist['x'], thumb_tip['y'] - wrist['y']])
+        # Vector from wrist to index tip
+        index_vector = np.array([index_tip['x'] - wrist['x'], index_tip['y'] - wrist['y']])
+        # Calculate angle between vectors
+        dot_product = np.dot(thumb_vector, index_vector)
+        norms = np.linalg.norm(thumb_vector) * np.linalg.norm(index_vector)
+        if norms > 0:
+            cos_angle = dot_product / norms
+            cos_angle = np.clip(cos_angle, -1.0, 1.0)  # Ensure valid range
+            angles['thumb_index_angle'] = math.degrees(math.acos(cos_angle))
+        else:
+            angles['thumb_index_angle'] = 0.0
+        # Calculate hand orientation (angle of palm)
+        middle_mcp = normalized_landmarks[9]
+        wrist = normalized_landmarks[0]
+        palm_vector = np.array([middle_mcp['x'] - wrist['x'], middle_mcp['y'] - wrist['y']])
+        # Angle with vertical axis
+        vertical = np.array([0, -1])  # Pointing up
+        dot_product = np.dot(palm_vector, vertical)
+        norms = np.linalg.norm(palm_vector) * np.linalg.norm(vertical)
+        if norms > 0:
+            cos_angle = dot_product / norms
+            cos_angle = np.clip(cos_angle, -1.0, 1.0)
+            angles['palm_orientation'] = math.degrees(math.acos(cos_angle))
+        else:
+            angles['palm_orientation'] = 0.0
+        return angles
+    def extract_distances(self, normalized_landmarks: List[Dict[str, float]]) -> Dict[str, float]:
+        """
+        Calculate distances between key landmarks.
+        Args:
+            normalized_landmarks: Normalized landmark coordinates
+        Returns:
+            Dictionary of calculated distances
+        """
+        distances = {}
+        # Distance between thumb tip and index tip
+        thumb_tip = normalized_landmarks[4]
+        index_tip = normalized_landmarks[8]
+        distances['thumb_index_distance'] = math.sqrt(
+            (thumb_tip['x'] - index_tip['x']) ** 2 +
+            (thumb_tip['y'] - index_tip['y']) ** 2
+        )
+        # Distance between index and middle finger tips
+        middle_tip = normalized_landmarks[12]
+        distances['index_middle_distance'] = math.sqrt(
+            (index_tip['x'] - middle_tip['x']) ** 2 +
+            (index_tip['y'] - middle_tip['y']) ** 2
+        )
+        # Distance from wrist to each fingertip
+        wrist = normalized_landmarks[0]
+        for i, finger_name in enumerate(['thumb', 'index', 'middle', 'ring', 'pinky']):
+            tip_idx = self.finger_tips[i]
+            tip = normalized_landmarks[tip_idx]
+            distances[f'wrist_{finger_name}_distance'] = math.sqrt(
+                (tip['x'] - wrist['x']) ** 2 +
+                (tip['y'] - wrist['y']) ** 2
+            )
+        return distances
+    def create_gesture_description(self, hand_landmarks: Dict[str, Any]) -> str:
+        """
+        Create a textual description of the gesture for OpenAI API.
+        Args:
+            hand_landmarks: Hand landmark data from MediaPipe
+        Returns:
+            String description of the gesture
+        """
+        normalized_landmarks = self.normalize_landmarks(hand_landmarks)
+        finger_states = self.extract_finger_states(normalized_landmarks)
+        angles = self.calculate_angles(normalized_landmarks)
+        distances = self.extract_distances(normalized_landmarks)
+        # Create description
+        description_parts = []
+        # Hand label
+        description_parts.append(f"Hand: {hand_landmarks['label']}")
+        # Finger states
+        extended_fingers = [name for name, extended in finger_states.items() if extended]
+        closed_fingers = [name for name, extended in finger_states.items() if not extended]
+        if extended_fingers:
+            description_parts.append(f"Extended fingers: {', '.join(extended_fingers)}")
+        if closed_fingers:
+            description_parts.append(f"Closed fingers: {', '.join(closed_fingers)}")
+        # Key measurements
+        description_parts.append(f"Thumb-index angle: {angles['thumb_index_angle']:.1f} degrees")
+        description_parts.append(f"Thumb-index distance: {distances['thumb_index_distance']:.3f}")
+        description_parts.append(f"Palm orientation: {angles['palm_orientation']:.1f} degrees")
+        # Special gesture patterns
+        if all(not extended for extended in finger_states.values()):
+            description_parts.append("Pattern: Closed fist")
+        elif all(extended for extended in finger_states.values()):
+            description_parts.append("Pattern: Open hand")
+        elif finger_states['index'] and not any(finger_states[f] for f in ['middle', 'ring', 'pinky']):
+            description_parts.append("Pattern: Pointing gesture")
+        elif finger_states['thumb'] and finger_states['index'] and distances['thumb_index_distance'] < 0.1:
+            description_parts.append("Pattern: Pinch gesture")
+        return "; ".join(description_parts)
+    def extract_features_vector(self, hand_landmarks: Dict[str, Any]) -> np.ndarray:
+        """
+        Extract numerical feature vector for machine learning models.
+        Args:
+            hand_landmarks: Hand landmark data from MediaPipe
+        Returns:
+            NumPy array of features
+        """
+        normalized_landmarks = self.normalize_landmarks(hand_landmarks)
+        finger_states = self.extract_finger_states(normalized_landmarks)
+        angles = self.calculate_angles(normalized_landmarks)
+        distances = self.extract_distances(normalized_landmarks)
+        # Create feature vector
+        features = []
+        # Finger states (5 features)
+        for finger in ['thumb', 'index', 'middle', 'ring', 'pinky']:
+            features.append(1.0 if finger_states[finger] else 0.0)
+        # Angles (2 features)
+        features.extend([
+            angles['thumb_index_angle'] / 180.0,  # Normalize to 0-1
+            angles['palm_orientation'] / 180.0
+        ])
+        # Distances (7 features)
+        features.extend([
+            distances['thumb_index_distance'],
+            distances['index_middle_distance'],
+            distances['wrist_thumb_distance'],
+            distances['wrist_index_distance'],
+            distances['wrist_middle_distance'],
+            distances['wrist_ring_distance'],
+            distances['wrist_pinky_distance']
+        ])
+        return np.array(features)

src/src/hand_detector.py ADDED Viewed

	@@ -0,0 +1,196 @@

+"""
+Hand Landmark Detection Module using MediaPipe
+"""
+import cv2
+import mediapipe as mp
+import numpy as np
+from typing import List, Tuple, Optional, Dict, Any
+class HandDetector:
+    """
+    A class for detecting hand landmarks using MediaPipe Hands.
+    """
+    def __init__(self,
+                 static_image_mode: bool = False,
+                 max_num_hands: int = 2,
+                 min_detection_confidence: float = 0.5,  # Lowered for better detection
+                 min_tracking_confidence: float = 0.3):  # Lowered for better detection
+        """
+        Initialize the HandDetector.
+        Args:
+            static_image_mode: Whether to treat input as static images
+            max_num_hands: Maximum number of hands to detect
+            min_detection_confidence: Minimum confidence for hand detection
+            min_tracking_confidence: Minimum confidence for hand tracking
+        """
+        self.static_image_mode = static_image_mode
+        self.max_num_hands = max_num_hands
+        self.min_detection_confidence = min_detection_confidence
+        self.min_tracking_confidence = min_tracking_confidence
+        # Initialize MediaPipe hands
+        self.mp_hands = mp.solutions.hands
+        self.hands = self.mp_hands.Hands(
+            static_image_mode=self.static_image_mode,
+            max_num_hands=self.max_num_hands,
+            min_detection_confidence=self.min_detection_confidence,
+            min_tracking_confidence=self.min_tracking_confidence
+        )
+        self.mp_drawing = mp.solutions.drawing_utils
+        self.mp_drawing_styles = mp.solutions.drawing_styles
+    def detect_hands(self, image: np.ndarray) -> Tuple[np.ndarray, List[Dict[str, Any]]]:
+        """
+        Detect hands in the given image.
+        Args:
+            image: Input image as numpy array (BGR format)
+        Returns:
+            Tuple of (annotated_image, hand_landmarks_list)
+        """
+        # Convert BGR to RGB
+        rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        # Process the image
+        results = self.hands.process(rgb_image)
+        # Create a copy of the image for annotation
+        annotated_image = image.copy()
+        hand_landmarks_list = []
+        if results.multi_hand_landmarks:
+            for idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
+                # Get hand classification (Left/Right)
+                hand_label = "Unknown"
+                if results.multi_handedness:
+                    hand_label = results.multi_handedness[idx].classification[0].label
+                # Draw landmarks on the image
+                self.mp_drawing.draw_landmarks(
+                    annotated_image,
+                    hand_landmarks,
+                    self.mp_hands.HAND_CONNECTIONS,
+                    self.mp_drawing_styles.get_default_hand_landmarks_style(),
+                    self.mp_drawing_styles.get_default_hand_connections_style()
+                )
+                # Extract landmark coordinates
+                landmarks = []
+                for landmark in hand_landmarks.landmark:
+                    landmarks.append({
+                        'x': landmark.x,
+                        'y': landmark.y,
+                        'z': landmark.z
+                    })
+                hand_data = {
+                    'label': hand_label,
+                    'landmarks': landmarks,
+                    'confidence': results.multi_handedness[idx].classification[0].score if results.multi_handedness else 0.0
+                }
+                hand_landmarks_list.append(hand_data)
+        return annotated_image, hand_landmarks_list
+    def get_landmark_positions(self, hand_landmarks: List[Dict[str, Any]],
+                             image_width: int, image_height: int) -> List[Tuple[int, int]]:
+        """
+        Convert normalized landmarks to pixel coordinates.
+        Args:
+            hand_landmarks: List of hand landmark data
+            image_width: Width of the image
+            image_height: Height of the image
+        Returns:
+            List of (x, y) pixel coordinates
+        """
+        positions = []
+        for hand_data in hand_landmarks:
+            hand_positions = []
+            for landmark in hand_data['landmarks']:
+                x = int(landmark['x'] * image_width)
+                y = int(landmark['y'] * image_height)
+                hand_positions.append((x, y))
+            positions.append(hand_positions)
+        return positions
+    def get_bounding_box(self, hand_landmarks: Dict[str, Any],
+                        image_width: int, image_height: int) -> Tuple[int, int, int, int]:
+        """
+        Get bounding box for detected hand.
+        Args:
+            hand_landmarks: Hand landmark data
+            image_width: Width of the image
+            image_height: Height of the image
+        Returns:
+            Tuple of (x_min, y_min, x_max, y_max)
+        """
+        x_coords = [landmark['x'] * image_width for landmark in hand_landmarks['landmarks']]
+        y_coords = [landmark['y'] * image_height for landmark in hand_landmarks['landmarks']]
+        x_min = int(min(x_coords))
+        y_min = int(min(y_coords))
+        x_max = int(max(x_coords))
+        y_max = int(max(y_coords))
+        return x_min, y_min, x_max, y_max
+    def is_hand_closed(self, hand_landmarks: Dict[str, Any]) -> bool:
+        """
+        Simple heuristic to determine if hand is closed (fist).
+        Args:
+            hand_landmarks: Hand landmark data
+        Returns:
+            Boolean indicating if hand appears closed
+        """
+        landmarks = hand_landmarks['landmarks']
+        # Check if fingertips are below their respective PIP joints
+        # Thumb: tip (4) vs IP (3)
+        # Index: tip (8) vs PIP (6)
+        # Middle: tip (12) vs PIP (10)
+        # Ring: tip (16) vs PIP (14)
+        # Pinky: tip (20) vs PIP (18)
+        finger_tips = [4, 8, 12, 16, 20]
+        finger_pips = [3, 6, 10, 14, 18]
+        closed_fingers = 0
+        for tip, pip in zip(finger_tips, finger_pips):
+            if landmarks[tip]['y'] > landmarks[pip]['y']:  # tip below pip
+                closed_fingers += 1
+        # Consider hand closed if 4 or more fingers are closed
+        return closed_fingers >= 4
+    def cleanup(self):
+        """
+        Clean up MediaPipe resources.
+        """
+        if hasattr(self, 'hands'):
+            self.hands.close()
+# Landmark indices for reference
+HAND_LANDMARKS = {
+    'WRIST': 0,
+    'THUMB_CMC': 1, 'THUMB_MCP': 2, 'THUMB_IP': 3, 'THUMB_TIP': 4,
+    'INDEX_FINGER_MCP': 5, 'INDEX_FINGER_PIP': 6, 'INDEX_FINGER_DIP': 7, 'INDEX_FINGER_TIP': 8,
+    'MIDDLE_FINGER_MCP': 9, 'MIDDLE_FINGER_PIP': 10, 'MIDDLE_FINGER_DIP': 11, 'MIDDLE_FINGER_TIP': 12,
+    'RING_FINGER_MCP': 13, 'RING_FINGER_PIP': 14, 'RING_FINGER_DIP': 15, 'RING_FINGER_TIP': 16,
+    'PINKY_MCP': 17, 'PINKY_PIP': 18, 'PINKY_DIP': 19, 'PINKY_TIP': 20
+}

src/src/openai_classifier.py ADDED Viewed

	@@ -0,0 +1,392 @@

+"""
+OpenAI API Integration for Sign Language Classification
+"""
+from openai import OpenAI
+import os
+from typing import List, Dict, Any, Optional
+import json
+import time
+from dotenv import load_dotenv
+from .fallback_classifier import FallbackSignLanguageClassifier
+# Load environment variables
+load_dotenv()
+class SignLanguageClassifier:
+    """
+    A class for classifying sign language gestures using OpenAI API.
+    """
+    def __init__(self, api_key: Optional[str] = None, model: str = "gpt-3.5-turbo"):
+        """
+        Initialize the SignLanguageClassifier.
+        Args:
+            api_key: OpenAI API key (if None, will use environment variable)
+            model: OpenAI model to use for classification
+        """
+        self.api_key = api_key or os.getenv('OPENAI_API_KEY')
+        self.model = model
+        if not self.api_key:
+            raise ValueError("OpenAI API key not provided. Set OPENAI_API_KEY environment variable or pass api_key parameter.")
+        # Initialize OpenAI client with new format
+        self.client = OpenAI(api_key=self.api_key)
+        # Rate limiting
+        self.last_request_time = 0
+        self.min_request_interval = 1.0  # Minimum seconds between requests
+        # Debug mode
+        self.debug = True
+        # Initialize fallback classifier
+        self.fallback_classifier = FallbackSignLanguageClassifier()
+        print(f"OpenAI classifier initialized with fallback support")
+    def _rate_limit(self):
+        """Implement simple rate limiting."""
+        current_time = time.time()
+        time_since_last = current_time - self.last_request_time
+        if time_since_last < self.min_request_interval:
+            time.sleep(self.min_request_interval - time_since_last)
+        self.last_request_time = time.time()
+    def classify_gesture(self, gesture_description: str,
+                        sign_language: str = "ASL",
+                        context: Optional[str] = None) -> Dict[str, Any]:
+        """
+        Classify a gesture using OpenAI API.
+        Args:
+            gesture_description: Textual description of the gesture
+            sign_language: Type of sign language (ASL, ISL, etc.)
+            context: Additional context for classification
+        Returns:
+            Dictionary containing classification results
+        """
+        self._rate_limit()
+        # Create the prompt
+        prompt = self._create_classification_prompt(gesture_description, sign_language, context)
+        if self.debug:
+            print(f"\n=== OpenAI Classification Debug ===")
+            print(f"Input gesture description: {gesture_description}")
+            print(f"Prompt sent to OpenAI: {prompt}")
+        try:
+            response = self.client.chat.completions.create(
+                model=self.model,
+                messages=[
+                    {"role": "system", "content": self._get_system_prompt(sign_language)},
+                    {"role": "user", "content": prompt}
+                ],
+                max_tokens=200,
+                temperature=0.3,
+                top_p=0.9
+            )
+            response_content = response.choices[0].message.content
+            if self.debug:
+                print(f"OpenAI response: {response_content}")
+            result = self._parse_response(response_content)
+            result['raw_response'] = response_content
+            result['success'] = True
+            if self.debug:
+                print(f"Parsed result: {result}")
+                print("=== End Debug ===\n")
+            return result
+        except Exception as e:
+            error_msg = str(e)
+            if self.debug:
+                print(f"OpenAI API Error: {error_msg}")
+                print("Falling back to pattern-based classification...")
+            # Use fallback classifier when OpenAI API fails
+            try:
+                fallback_result = self.fallback_classifier.classify_gesture(
+                    gesture_description, sign_language, context
+                )
+                fallback_result['fallback_used'] = True
+                fallback_result['openai_error'] = error_msg
+                if self.debug:
+                    print(f"Fallback result: {fallback_result}")
+                    print("=== End Debug ===\n")
+                return fallback_result
+            except Exception as fallback_error:
+                if self.debug:
+                    print(f"Fallback also failed: {str(fallback_error)}")
+                    print("=== End Debug ===\n")
+                return {
+                    'success': False,
+                    'error': error_msg,
+                    'fallback_error': str(fallback_error),
+                    'letter': None,
+                    'word': None,
+                    'confidence': 0.0,
+                    'description': None
+                }
+    def classify_sequence(self, gesture_descriptions: List[str],
+                         sign_language: str = "ASL") -> Dict[str, Any]:
+        """
+        Classify a sequence of gestures to form words or sentences.
+        Args:
+            gesture_descriptions: List of gesture descriptions
+            sign_language: Type of sign language
+        Returns:
+            Dictionary containing sequence classification results
+        """
+        self._rate_limit()
+        # Create sequence prompt
+        prompt = self._create_sequence_prompt(gesture_descriptions, sign_language)
+        try:
+            response = self.client.chat.completions.create(
+                model=self.model,
+                messages=[
+                    {"role": "system", "content": self._get_sequence_system_prompt(sign_language)},
+                    {"role": "user", "content": prompt}
+                ],
+                max_tokens=300,
+                temperature=0.3,
+                top_p=0.9
+            )
+            result = self._parse_sequence_response(response.choices[0].message.content)
+            result['raw_response'] = response.choices[0].message.content
+            result['success'] = True
+            return result
+        except Exception as e:
+            # Use fallback for sequence classification too
+            try:
+                fallback_result = self.fallback_classifier.classify_sequence(
+                    gesture_descriptions, sign_language
+                )
+                fallback_result['fallback_used'] = True
+                fallback_result['openai_error'] = str(e)
+                return fallback_result
+            except Exception as fallback_error:
+                return {
+                    'success': False,
+                    'error': str(e),
+                    'fallback_error': str(fallback_error),
+                    'word': None,
+                    'sentence': None,
+                    'confidence': 0.0
+                }
+    def _get_system_prompt(self, sign_language: str) -> str:
+        """Get system prompt for gesture classification."""
+        return f"""You are an expert in {sign_language} (American Sign Language) recognition.
+        Your task is to provide ONE CLEAR PREDICTION for each hand gesture.
+        PRIORITY ORDER:
+        1. If it's a complete word sign (like "HELLO", "HUNGRY", "THANK YOU"), identify the WORD
+        2. If it's a letter/number sign, identify the LETTER or NUMBER
+        3. If uncertain, provide your best single guess
+        Respond in JSON format:
+        {{
+            "letter": "A" or null,
+            "word": "HUNGRY" or null,
+            "confidence": 0.85,
+            "description": "Brief explanation"
+        }}
+        IMPORTANT RULES:
+        - Provide either a letter OR a word, not both
+        - Words take priority over letters
+        - Be decisive - give your best single prediction
+        - Common words: HELLO, HUNGRY, THANK YOU, PLEASE, SORRY, YES, NO, I, YOU, LOVE, etc.
+        - Letters: A-Z, Numbers: 0-9
+        - Confidence should reflect your certainty (0.1 = very uncertain, 0.9 = very certain)
+        Focus on the most likely single interpretation of the gesture."""
+    def _get_sequence_system_prompt(self, sign_language: str) -> str:
+        """Get system prompt for sequence classification."""
+        return f"""You are an expert in {sign_language} recognition specializing in interpreting sequences of gestures.
+        Your task is to analyze a sequence of hand gestures and determine if they form a word or sentence.
+        Respond in JSON format:
+        {{
+            "word": "HELLO" or null,
+            "sentence": "HELLO WORLD" or null,
+            "confidence": 0.85,
+            "individual_letters": ["H", "E", "L", "L", "O"]
+        }}
+        Consider:
+        - Sequential letter spelling
+        - Common {sign_language} words and phrases
+        - Context and flow between gestures"""
+    def _create_classification_prompt(self, gesture_description: str,
+                                    sign_language: str, context: Optional[str]) -> str:
+        """Create enhanced prompt for single gesture classification."""
+        prompt = f"""You are an expert ASL (American Sign Language) interpreter. Analyze this hand gesture and provide ONE CLEAR PREDICTION.
+GESTURE DATA:
+{gesture_description}
+TASK: Identify what this gesture represents. Respond with EXACTLY ONE of these:
+- A single letter (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y, Z)
+- A single number (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)
+- A complete word (HELLO, HUNGRY, THANK YOU, PLEASE, SORRY, YES, NO, I, YOU, LOVE, HELP, MORE, WATER, EAT, DRINK, etc.)
+PRIORITY: If this could be a word sign, choose the WORD. If it's clearly a letter/number, choose that.
+COMMON ASL PATTERNS:
+- Closed fist = A, S, or numbers
+- Open hand = 5, HELLO, or STOP
+- Pointing = 1, I, or YOU
+- Pinch gesture = F, 9, or SMALL
+"""
+        if context:
+            prompt += f"Context: {context}\n\n"
+        prompt += """Respond in this EXACT JSON format:
+{
+    "letter": "A" or null,
+    "word": "HELLO" or null,
+    "confidence": 0.85,
+    "description": "Brief explanation"
+}
+Be decisive and confident in your single prediction."""
+        return prompt
+    def _create_sequence_prompt(self, gesture_descriptions: List[str],
+                              sign_language: str) -> str:
+        """Create prompt for gesture sequence classification."""
+        prompt = f"""Analyze this sequence of {sign_language} hand gestures:
+"""
+        for i, description in enumerate(gesture_descriptions, 1):
+            prompt += f"Gesture {i}: {description}\n"
+        prompt += f"""
+What word or sentence do these {sign_language} gestures spell out when combined?
+Consider the sequence and flow of the gestures."""
+        return prompt
+    def _parse_response(self, response_text: str) -> Dict[str, Any]:
+        """Parse OpenAI response for single gesture classification."""
+        try:
+            # Try to parse as JSON first
+            if '{' in response_text and '}' in response_text:
+                json_start = response_text.find('{')
+                json_end = response_text.rfind('}') + 1
+                json_str = response_text[json_start:json_end]
+                result = json.loads(json_str)
+                # Ensure required fields exist
+                return {
+                    'letter': result.get('letter'),
+                    'word': result.get('word'),
+                    'confidence': float(result.get('confidence', 0.0)),
+                    'description': result.get('description', '')
+                }
+            else:
+                # Fallback: simple text parsing
+                return self._parse_text_response(response_text)
+        except (json.JSONDecodeError, ValueError):
+            return self._parse_text_response(response_text)
+    def _parse_sequence_response(self, response_text: str) -> Dict[str, Any]:
+        """Parse OpenAI response for sequence classification."""
+        try:
+            if '{' in response_text and '}' in response_text:
+                json_start = response_text.find('{')
+                json_end = response_text.rfind('}') + 1
+                json_str = response_text[json_start:json_end]
+                result = json.loads(json_str)
+                return {
+                    'word': result.get('word'),
+                    'sentence': result.get('sentence'),
+                    'confidence': float(result.get('confidence', 0.0)),
+                    'individual_letters': result.get('individual_letters', [])
+                }
+            else:
+                return self._parse_sequence_text_response(response_text)
+        except (json.JSONDecodeError, ValueError):
+            return self._parse_sequence_text_response(response_text)
+    def _parse_text_response(self, response_text: str) -> Dict[str, Any]:
+        """Enhanced fallback text parsing for single gesture."""
+        response_lower = response_text.lower()
+        # Common ASL words to look for
+        common_words = ['hello', 'hungry', 'thank you', 'please', 'sorry', 'yes', 'no',
+                       'i', 'you', 'love', 'help', 'more', 'water', 'eat', 'drink',
+                       'good', 'bad', 'happy', 'sad', 'stop', 'go', 'come', 'home']
+        # Look for words first (priority)
+        word = None
+        for w in common_words:
+            if w in response_lower:
+                word = w.upper()
+                break
+        # Look for letter patterns
+        letter = None
+        if not word:  # Only look for letters if no word found
+            import re
+            letter_match = re.search(r'letter\s*[:\-]?\s*([a-z])', response_lower)
+            if letter_match:
+                letter = letter_match.group(1).upper()
+        # Look for word patterns
+        word = None
+        if 'word' in response_lower:
+            word_match = re.search(r'word\s*[:\-]?\s*([a-z]+)', response_lower)
+            if word_match:
+                word = word_match.group(1).upper()
+        return {
+            'letter': letter,
+            'word': word,
+            'confidence': 0.5,  # Default confidence for text parsing
+            'description': response_text[:100]  # First 100 chars
+        }
+    def _parse_sequence_text_response(self, response_text: str) -> Dict[str, Any]:
+        """Fallback text parsing for sequence."""
+        return {
+            'word': None,
+            'sentence': None,
+            'confidence': 0.5,
+            'individual_letters': []
+        }

src/src/output_handler.py ADDED Viewed

	@@ -0,0 +1,391 @@

+"""
+Output Display and Speech Synthesis Module
+Handles text display and text-to-speech functionality
+"""
+import pyttsx3
+import threading
+import time
+import os
+from typing import List, Dict, Any, Optional, Callable
+from queue import Queue, Empty
+import json
+from datetime import datetime
+class OutputHandler:
+    """
+    Handles text display and speech synthesis for sign language detection results.
+    """
+    def __init__(self,
+                 enable_speech: bool = True,
+                 speech_rate: int = 150,
+                 speech_volume: float = 0.9,
+                 save_transcript: bool = True,
+                 transcript_file: str = "sign_language_transcript.txt"):
+        """
+        Initialize the OutputHandler.
+        Args:
+            enable_speech: Whether to enable text-to-speech
+            speech_rate: Speech rate (words per minute)
+            speech_volume: Speech volume (0.0 to 1.0)
+            save_transcript: Whether to save transcript to file
+            transcript_file: Path to transcript file
+        """
+        self.enable_speech = enable_speech
+        self.speech_rate = speech_rate
+        self.speech_volume = speech_volume
+        self.save_transcript = save_transcript
+        self.transcript_file = transcript_file
+        # Initialize TTS engine
+        self.tts_engine = None
+        self.tts_thread = None
+        self.speech_queue = Queue()
+        self.is_speaking = False
+        # Transcript storage
+        self.transcript = []
+        self.current_session_start = datetime.now()
+        # Display callbacks
+        self.display_callbacks = []
+        # Initialize TTS if enabled
+        if self.enable_speech:
+            self._initialize_tts()
+    def _initialize_tts(self) -> bool:
+        """
+        Initialize the text-to-speech engine.
+        Returns:
+            True if initialized successfully, False otherwise
+        """
+        try:
+            self.tts_engine = pyttsx3.init()
+            # Set properties
+            self.tts_engine.setProperty('rate', self.speech_rate)
+            self.tts_engine.setProperty('volume', self.speech_volume)
+            # Get available voices
+            voices = self.tts_engine.getProperty('voices')
+            if voices:
+                # Try to use a female voice if available
+                for voice in voices:
+                    if 'female' in voice.name.lower() or 'woman' in voice.name.lower():
+                        self.tts_engine.setProperty('voice', voice.id)
+                        break
+                else:
+                    # Use first available voice
+                    self.tts_engine.setProperty('voice', voices[0].id)
+            # Start TTS thread
+            self.tts_thread = threading.Thread(target=self._tts_worker, daemon=True)
+            self.tts_thread.start()
+            print("Text-to-speech initialized successfully")
+            return True
+        except Exception as e:
+            print(f"Error initializing TTS: {e}")
+            self.enable_speech = False
+            return False
+    def _tts_worker(self):
+        """TTS worker thread that processes speech queue."""
+        while True:
+            try:
+                text = self.speech_queue.get(timeout=1.0)
+                if text is None:  # Shutdown signal
+                    break
+                self.is_speaking = True
+                self.tts_engine.say(text)
+                self.tts_engine.runAndWait()
+                self.is_speaking = False
+            except Empty:
+                continue
+            except Exception as e:
+                print(f"Error in TTS worker: {e}")
+                self.is_speaking = False
+    def add_display_callback(self, callback: Callable):
+        """
+        Add a callback function for display updates.
+        Args:
+            callback: Function to call when display should be updated
+        """
+        self.display_callbacks.append(callback)
+    def display_detection(self, detection: Dict[str, Any], speak: bool = True):
+        """
+        Display and optionally speak a gesture detection result.
+        Args:
+            detection: Detection result dictionary
+            speak: Whether to speak the result
+        """
+        # Extract relevant information
+        hand_label = detection.get('hand_label', 'Unknown')
+        classification = detection.get('classification', {})
+        if not classification.get('success', False):
+            return
+        # Format display text
+        display_text = self._format_detection_text(detection)
+        # Add to transcript
+        if self.save_transcript:
+            self._add_to_transcript(detection, display_text)
+        # Call display callbacks
+        for callback in self.display_callbacks:
+            try:
+                callback(display_text, detection)
+            except Exception as e:
+                print(f"Error in display callback: {e}")
+        # Speak if enabled and requested
+        if speak and self.enable_speech:
+            speech_text = self._format_speech_text(detection)
+            self.speak(speech_text)
+        # Print to console
+        print(f"[{datetime.now().strftime('%H:%M:%S')}] {display_text}")
+    def display_sequence(self, sequence_result: Dict[str, Any], speak: bool = True):
+        """
+        Display and optionally speak a gesture sequence result.
+        Args:
+            sequence_result: Sequence classification result
+            speak: Whether to speak the result
+        """
+        if not sequence_result.get('success', False):
+            return
+        # Format display text
+        display_text = self._format_sequence_text(sequence_result)
+        # Add to transcript
+        if self.save_transcript:
+            self._add_sequence_to_transcript(sequence_result, display_text)
+        # Call display callbacks
+        for callback in self.display_callbacks:
+            try:
+                callback(display_text, sequence_result)
+            except Exception as e:
+                print(f"Error in display callback: {e}")
+        # Speak if enabled and requested
+        if speak and self.enable_speech:
+            speech_text = self._format_sequence_speech_text(sequence_result)
+            self.speak(speech_text)
+        # Print to console
+        print(f"[{datetime.now().strftime('%H:%M:%S')}] SEQUENCE: {display_text}")
+    def speak(self, text: str):
+        """
+        Add text to speech queue.
+        Args:
+            text: Text to speak
+        """
+        if self.enable_speech and not self.is_speaking:
+            self.speech_queue.put(text)
+    def _format_detection_text(self, detection: Dict[str, Any]) -> str:
+        """Format detection result for display."""
+        classification = detection.get('classification', {})
+        hand_label = detection.get('hand_label', 'Unknown')
+        parts = [f"{hand_label} hand:"]
+        if classification.get('letter'):
+            parts.append(f"Letter '{classification['letter']}'")
+        if classification.get('word'):
+            parts.append(f"Word '{classification['word']}'")
+        confidence = classification.get('confidence', 0.0)
+        if confidence > 0:
+            parts.append(f"({confidence:.1%} confidence)")
+        return " ".join(parts)
+    def _format_sequence_text(self, sequence_result: Dict[str, Any]) -> str:
+        """Format sequence result for display."""
+        parts = []
+        if sequence_result.get('word'):
+            parts.append(f"Word: '{sequence_result['word']}'")
+        if sequence_result.get('sentence'):
+            parts.append(f"Sentence: '{sequence_result['sentence']}'")
+        if sequence_result.get('individual_letters'):
+            letters = " ".join(sequence_result['individual_letters'])
+            parts.append(f"Letters: {letters}")
+        confidence = sequence_result.get('confidence', 0.0)
+        if confidence > 0:
+            parts.append(f"({confidence:.1%} confidence)")
+        return " | ".join(parts)
+    def _format_speech_text(self, detection: Dict[str, Any]) -> str:
+        """Format detection result for speech."""
+        classification = detection.get('classification', {})
+        if classification.get('word'):
+            return classification['word']
+        elif classification.get('letter'):
+            return f"Letter {classification['letter']}"
+        else:
+            return "Gesture detected"
+    def _format_sequence_speech_text(self, sequence_result: Dict[str, Any]) -> str:
+        """Format sequence result for speech."""
+        if sequence_result.get('sentence'):
+            return sequence_result['sentence']
+        elif sequence_result.get('word'):
+            return sequence_result['word']
+        else:
+            return "Sequence detected"
+    def _add_to_transcript(self, detection: Dict[str, Any], display_text: str):
+        """Add detection to transcript."""
+        transcript_entry = {
+            'timestamp': datetime.now().isoformat(),
+            'type': 'detection',
+            'display_text': display_text,
+            'detection': detection
+        }
+        self.transcript.append(transcript_entry)
+        # Save to file periodically
+        if len(self.transcript) % 10 == 0:
+            self._save_transcript()
+    def _add_sequence_to_transcript(self, sequence_result: Dict[str, Any], display_text: str):
+        """Add sequence to transcript."""
+        transcript_entry = {
+            'timestamp': datetime.now().isoformat(),
+            'type': 'sequence',
+            'display_text': display_text,
+            'sequence_result': sequence_result
+        }
+        self.transcript.append(transcript_entry)
+        self._save_transcript()
+    def _save_transcript(self):
+        """Save transcript to file."""
+        if not self.save_transcript:
+            return
+        try:
+            # Create transcript data
+            transcript_data = {
+                'session_start': self.current_session_start.isoformat(),
+                'last_updated': datetime.now().isoformat(),
+                'entries': self.transcript
+            }
+            # Save as JSON
+            json_file = os.path.splitext(self.transcript_file)[0] + '.json'
+            with open(json_file, 'w') as f:
+                json.dump(transcript_data, f, indent=2)
+            # Save as readable text
+            with open(self.transcript_file, 'w') as f:
+                f.write(f"Sign Language Detection Transcript\n")
+                f.write(f"Session started: {self.current_session_start.strftime('%Y-%m-%d %H:%M:%S')}\n")
+                f.write(f"Last updated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
+                f.write("=" * 50 + "\n\n")
+                for entry in self.transcript:
+                    timestamp = datetime.fromisoformat(entry['timestamp'])
+                    f.write(f"[{timestamp.strftime('%H:%M:%S')}] {entry['display_text']}\n")
+        except Exception as e:
+            print(f"Error saving transcript: {e}")
+    def get_transcript_summary(self) -> Dict[str, Any]:
+        """
+        Get summary of current transcript.
+        Returns:
+            Dictionary containing transcript summary
+        """
+        if not self.transcript:
+            return {'total_entries': 0, 'detections': 0, 'sequences': 0}
+        detections = sum(1 for entry in self.transcript if entry['type'] == 'detection')
+        sequences = sum(1 for entry in self.transcript if entry['type'] == 'sequence')
+        # Extract detected words and letters
+        detected_words = []
+        detected_letters = []
+        for entry in self.transcript:
+            if entry['type'] == 'detection':
+                classification = entry.get('detection', {}).get('classification', {})
+                if classification.get('word'):
+                    detected_words.append(classification['word'])
+                if classification.get('letter'):
+                    detected_letters.append(classification['letter'])
+            elif entry['type'] == 'sequence':
+                sequence_result = entry.get('sequence_result', {})
+                if sequence_result.get('word'):
+                    detected_words.append(sequence_result['word'])
+                if sequence_result.get('sentence'):
+                    detected_words.extend(sequence_result['sentence'].split())
+        return {
+            'total_entries': len(self.transcript),
+            'detections': detections,
+            'sequences': sequences,
+            'detected_words': list(set(detected_words)),
+            'detected_letters': list(set(detected_letters)),
+            'session_duration': (datetime.now() - self.current_session_start).total_seconds()
+        }
+    def clear_transcript(self):
+        """Clear the current transcript."""
+        self.transcript = []
+        self.current_session_start = datetime.now()
+        print("Transcript cleared")
+    def set_speech_enabled(self, enabled: bool):
+        """Enable or disable speech synthesis."""
+        self.enable_speech = enabled
+        if not enabled and self.is_speaking:
+            # Stop current speech
+            if self.tts_engine:
+                self.tts_engine.stop()
+    def cleanup(self):
+        """Clean up resources."""
+        # Save final transcript
+        if self.save_transcript and self.transcript:
+            self._save_transcript()
+        # Stop TTS
+        if self.tts_thread:
+            self.speech_queue.put(None)  # Shutdown signal
+            self.tts_thread.join(timeout=2.0)
+        if self.tts_engine:
+            try:
+                self.tts_engine.stop()
+            except:
+                pass

src/src/prediction_logger.py ADDED Viewed

	@@ -0,0 +1,294 @@

+"""
+Comprehensive Prediction Logging System
+This module provides detailed logging for the sign language prediction pipeline
+to help identify where predictions are failing and track performance.
+"""
+import json
+import time
+from datetime import datetime
+from typing import Dict, Any, List, Optional
+import os
+class PredictionLogger:
+    """
+    Comprehensive logging system for sign language predictions.
+    """
+    def __init__(self, log_file: str = "prediction_logs.json", debug: bool = True):
+        """
+        Initialize the prediction logger.
+        Args:
+            log_file: Path to the log file
+            debug: Whether to print debug information
+        """
+        self.log_file = log_file
+        self.debug = debug
+        self.session_id = f"session_{int(time.time())}"
+        self.logs = []
+        if self.debug:
+            print(f"🔍 Prediction Logger initialized - Session: {self.session_id}")
+    def log_hand_detection(self, image_info: Dict[str, Any], hands_detected: int,
+                          detection_confidence: List[float] = None) -> str:
+        """
+        Log hand detection results.
+        Args:
+            image_info: Information about the processed image
+            hands_detected: Number of hands detected
+            detection_confidence: List of confidence scores for detected hands
+        Returns:
+            Log entry ID
+        """
+        log_entry = {
+            "id": f"hand_det_{int(time.time() * 1000)}",
+            "timestamp": datetime.now().isoformat(),
+            "session_id": self.session_id,
+            "stage": "hand_detection",
+            "image_info": image_info,
+            "hands_detected": hands_detected,
+            "detection_confidence": detection_confidence or [],
+            "success": hands_detected > 0
+        }
+        self.logs.append(log_entry)
+        if self.debug:
+            status = "✅" if hands_detected > 0 else "❌"
+            print(f"{status} Hand Detection: {hands_detected} hands detected")
+            if detection_confidence:
+                for i, conf in enumerate(detection_confidence):
+                    print(f"   Hand {i+1}: {conf:.1%} confidence")
+        return log_entry["id"]
+    def log_gesture_extraction(self, hand_data: Dict[str, Any],
+                             gesture_description: str) -> str:
+        """
+        Log gesture extraction results.
+        Args:
+            hand_data: Hand landmark data
+            gesture_description: Generated gesture description
+        Returns:
+            Log entry ID
+        """
+        log_entry = {
+            "id": f"gest_ext_{int(time.time() * 1000)}",
+            "timestamp": datetime.now().isoformat(),
+            "session_id": self.session_id,
+            "stage": "gesture_extraction",
+            "hand_label": hand_data.get('label', 'Unknown'),
+            "hand_confidence": hand_data.get('confidence', 0.0),
+            "gesture_description": gesture_description,
+            "description_length": len(gesture_description),
+            "success": len(gesture_description) > 0
+        }
+        self.logs.append(log_entry)
+        if self.debug:
+            print(f"✅ Gesture Extraction: {len(gesture_description)} chars description")
+            print(f"   Hand: {hand_data.get('label', 'Unknown')} ({hand_data.get('confidence', 0):.1%})")
+        return log_entry["id"]
+    def log_ai_classification(self, gesture_description: str, ai_provider: str,
+                            response: Dict[str, Any], success: bool,
+                            error_message: str = None) -> str:
+        """
+        Log AI classification attempts.
+        Args:
+            gesture_description: Input gesture description
+            ai_provider: AI provider used (gemini, openai, etc.)
+            response: AI response data
+            success: Whether the classification succeeded
+            error_message: Error message if failed
+        Returns:
+            Log entry ID
+        """
+        log_entry = {
+            "id": f"ai_class_{int(time.time() * 1000)}",
+            "timestamp": datetime.now().isoformat(),
+            "session_id": self.session_id,
+            "stage": "ai_classification",
+            "ai_provider": ai_provider,
+            "input_description": gesture_description,
+            "response": response,
+            "success": success,
+            "error_message": error_message,
+            "prediction": response.get('word') or response.get('letter') if success else None,
+            "confidence": response.get('confidence', 0.0) if success else 0.0
+        }
+        self.logs.append(log_entry)
+        if self.debug:
+            status = "✅" if success else "❌"
+            if success:
+                prediction = response.get('word') or response.get('letter') or 'No prediction'
+                confidence = response.get('confidence', 0.0)
+                print(f"{status} AI Classification ({ai_provider}): {prediction} ({confidence:.1%})")
+            else:
+                print(f"{status} AI Classification ({ai_provider}) Failed: {error_message}")
+        return log_entry["id"]
+    def log_fallback_classification(self, gesture_description: str,
+                                  response: Dict[str, Any], success: bool) -> str:
+        """
+        Log fallback classification results.
+        Args:
+            gesture_description: Input gesture description
+            response: Fallback classifier response
+            success: Whether the classification succeeded
+        Returns:
+            Log entry ID
+        """
+        log_entry = {
+            "id": f"fallback_{int(time.time() * 1000)}",
+            "timestamp": datetime.now().isoformat(),
+            "session_id": self.session_id,
+            "stage": "fallback_classification",
+            "input_description": gesture_description,
+            "response": response,
+            "success": success,
+            "prediction": response.get('word') or response.get('letter') if success else None,
+            "confidence": response.get('confidence', 0.0) if success else 0.0
+        }
+        self.logs.append(log_entry)
+        if self.debug:
+            status = "✅" if success else "❌"
+            if success:
+                prediction = response.get('word') or response.get('letter') or 'No prediction'
+                confidence = response.get('confidence', 0.0)
+                print(f"{status} Fallback Classification: {prediction} ({confidence:.1%})")
+            else:
+                print(f"{status} Fallback Classification Failed")
+        return log_entry["id"]
+    def log_final_prediction(self, file_path: str, final_prediction: str,
+                           confidence: float, method_used: str,
+                           processing_time: float) -> str:
+        """
+        Log final prediction results.
+        Args:
+            file_path: Path to the processed file
+            final_prediction: Final prediction result
+            confidence: Prediction confidence
+            method_used: Method that provided the final prediction
+            processing_time: Total processing time in seconds
+        Returns:
+            Log entry ID
+        """
+        log_entry = {
+            "id": f"final_{int(time.time() * 1000)}",
+            "timestamp": datetime.now().isoformat(),
+            "session_id": self.session_id,
+            "stage": "final_prediction",
+            "file_path": file_path,
+            "final_prediction": final_prediction,
+            "confidence": confidence,
+            "method_used": method_used,
+            "processing_time": processing_time,
+            "success": final_prediction is not None and final_prediction != "No prediction"
+        }
+        self.logs.append(log_entry)
+        if self.debug:
+            status = "🎯" if log_entry["success"] else "❌"
+            print(f"{status} Final Prediction: {final_prediction} ({confidence:.1%}) via {method_used}")
+            print(f"   Processing time: {processing_time:.2f}s")
+        return log_entry["id"]
+    def get_session_summary(self) -> Dict[str, Any]:
+        """
+        Get a summary of the current session.
+        Returns:
+            Session summary statistics
+        """
+        total_predictions = len([log for log in self.logs if log["stage"] == "final_prediction"])
+        successful_predictions = len([log for log in self.logs
+                                    if log["stage"] == "final_prediction" and log["success"]])
+        hand_detections = len([log for log in self.logs if log["stage"] == "hand_detection"])
+        successful_hand_detections = len([log for log in self.logs
+                                        if log["stage"] == "hand_detection" and log["success"]])
+        ai_attempts = len([log for log in self.logs if log["stage"] == "ai_classification"])
+        successful_ai = len([log for log in self.logs
+                           if log["stage"] == "ai_classification" and log["success"]])
+        fallback_attempts = len([log for log in self.logs if log["stage"] == "fallback_classification"])
+        summary = {
+            "session_id": self.session_id,
+            "total_files_processed": total_predictions,
+            "successful_predictions": successful_predictions,
+            "prediction_success_rate": successful_predictions / total_predictions if total_predictions > 0 else 0,
+            "hand_detection_success_rate": successful_hand_detections / hand_detections if hand_detections > 0 else 0,
+            "ai_classification_success_rate": successful_ai / ai_attempts if ai_attempts > 0 else 0,
+            "fallback_usage_rate": fallback_attempts / total_predictions if total_predictions > 0 else 0,
+            "total_logs": len(self.logs)
+        }
+        return summary
+    def save_logs(self) -> bool:
+        """
+        Save logs to file.
+        Returns:
+            True if successful, False otherwise
+        """
+        try:
+            with open(self.log_file, 'w') as f:
+                json.dump({
+                    "session_summary": self.get_session_summary(),
+                    "logs": self.logs
+                }, f, indent=2)
+            if self.debug:
+                print(f"💾 Logs saved to {self.log_file}")
+            return True
+        except Exception as e:
+            if self.debug:
+                print(f"❌ Failed to save logs: {e}")
+            return False
+    def print_summary(self):
+        """Print a summary of the current session."""
+        summary = self.get_session_summary()
+        print("\n" + "="*50)
+        print("📊 PREDICTION SESSION SUMMARY")
+        print("="*50)
+        print(f"Session ID: {summary['session_id']}")
+        print(f"Files Processed: {summary['total_files_processed']}")
+        print(f"Successful Predictions: {summary['successful_predictions']}")
+        print(f"Prediction Success Rate: {summary['prediction_success_rate']:.1%}")
+        print(f"Hand Detection Success Rate: {summary['hand_detection_success_rate']:.1%}")
+        print(f"AI Classification Success Rate: {summary['ai_classification_success_rate']:.1%}")
+        print(f"Fallback Usage Rate: {summary['fallback_usage_rate']:.1%}")
+        print(f"Total Log Entries: {summary['total_logs']}")
+        print("="*50)

src/src/visualization_utils.py ADDED Viewed

	@@ -0,0 +1,359 @@

+"""
+Visualization utilities for enhanced result display
+"""
+import cv2
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+from matplotlib.patches import Circle
+import plotly.graph_objects as go
+import plotly.express as px
+from plotly.subplots import make_subplots
+from typing import List, Dict, Any, Tuple
+import pandas as pd
+class HandLandmarkVisualizer:
+    """
+    Enhanced visualization for hand landmarks and gesture analysis.
+    """
+    def __init__(self):
+        """Initialize the visualizer."""
+        # MediaPipe hand landmark connections
+        self.hand_connections = [
+            (0, 1), (1, 2), (2, 3), (3, 4),  # Thumb
+            (0, 5), (5, 6), (6, 7), (7, 8),  # Index finger
+            (0, 9), (9, 10), (10, 11), (11, 12),  # Middle finger
+            (0, 13), (13, 14), (14, 15), (15, 16),  # Ring finger
+            (0, 17), (17, 18), (18, 19), (19, 20),  # Pinky
+            (5, 9), (9, 13), (13, 17)  # Palm connections
+        ]
+        # Color scheme for different parts
+        self.colors = {
+            'thumb': (255, 0, 0),      # Red
+            'index': (0, 255, 0),      # Green
+            'middle': (0, 0, 255),     # Blue
+            'ring': (255, 255, 0),     # Yellow
+            'pinky': (255, 0, 255),    # Magenta
+            'palm': (0, 255, 255),     # Cyan
+            'wrist': (128, 128, 128)   # Gray
+        }
+        # Finger landmark ranges
+        self.finger_ranges = {
+            'thumb': range(1, 5),
+            'index': range(5, 9),
+            'middle': range(9, 13),
+            'ring': range(13, 17),
+            'pinky': range(17, 21),
+            'wrist': [0]
+        }
+    def draw_enhanced_landmarks(self, image: np.ndarray,
+                              hand_landmarks: List[Dict[str, Any]]) -> np.ndarray:
+        """
+        Draw enhanced hand landmarks with color coding and connections.
+        Args:
+            image: Input image
+            hand_landmarks: List of hand landmark data
+        Returns:
+            Image with enhanced landmarks drawn
+        """
+        annotated_image = image.copy()
+        height, width = image.shape[:2]
+        for hand_data in hand_landmarks:
+            landmarks = hand_data['landmarks']
+            hand_label = hand_data['label']
+            # Convert normalized coordinates to pixel coordinates
+            landmark_points = []
+            for landmark in landmarks:
+                x = int(landmark['x'] * width)
+                y = int(landmark['y'] * height)
+                landmark_points.append((x, y))
+            # Draw connections
+            for connection in self.hand_connections:
+                start_idx, end_idx = connection
+                start_point = landmark_points[start_idx]
+                end_point = landmark_points[end_idx]
+                # Determine color based on finger
+                color = self._get_connection_color(start_idx, end_idx)
+                cv2.line(annotated_image, start_point, end_point, color, 2)
+            # Draw landmark points
+            for i, point in enumerate(landmark_points):
+                color = self._get_landmark_color(i)
+                cv2.circle(annotated_image, point, 4, color, -1)
+                cv2.circle(annotated_image, point, 6, (255, 255, 255), 1)
+            # Add hand label
+            if landmark_points:
+                label_pos = (landmark_points[0][0] - 50, landmark_points[0][1] - 20)
+                cv2.putText(annotated_image, f"{hand_label} Hand", label_pos,
+                           cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
+                cv2.putText(annotated_image, f"{hand_label} Hand", label_pos,
+                           cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 1)
+        return annotated_image
+    def _get_landmark_color(self, landmark_idx: int) -> Tuple[int, int, int]:
+        """Get color for a specific landmark."""
+        for finger, indices in self.finger_ranges.items():
+            if landmark_idx in indices:
+                return self.colors[finger]
+        return (128, 128, 128)  # Default gray
+    def _get_connection_color(self, start_idx: int, end_idx: int) -> Tuple[int, int, int]:
+        """Get color for a connection between landmarks."""
+        # Use the color of the finger that both landmarks belong to
+        for finger, indices in self.finger_ranges.items():
+            if start_idx in indices and end_idx in indices:
+                return self.colors[finger]
+        return self.colors['palm']  # Default to palm color
+    def create_3d_hand_plot(self, hand_landmarks: Dict[str, Any]) -> go.Figure:
+        """
+        Create a 3D visualization of hand landmarks.
+        Args:
+            hand_landmarks: Hand landmark data
+        Returns:
+            Plotly 3D figure
+        """
+        landmarks = hand_landmarks['landmarks']
+        # Extract coordinates
+        x_coords = [landmark['x'] for landmark in landmarks]
+        y_coords = [-landmark['y'] for landmark in landmarks]  # Flip Y for proper orientation
+        z_coords = [landmark['z'] for landmark in landmarks]
+        # Create 3D scatter plot
+        fig = go.Figure()
+        # Add landmark points
+        fig.add_trace(go.Scatter3d(
+            x=x_coords,
+            y=y_coords,
+            z=z_coords,
+            mode='markers',
+            marker=dict(
+                size=8,
+                color=z_coords,
+                colorscale='Viridis',
+                showscale=True,
+                colorbar=dict(title="Depth")
+            ),
+            text=[f"Landmark {i}" for i in range(len(landmarks))],
+            name="Hand Landmarks"
+        ))
+        # Add connections
+        for connection in self.hand_connections:
+            start_idx, end_idx = connection
+            fig.add_trace(go.Scatter3d(
+                x=[x_coords[start_idx], x_coords[end_idx]],
+                y=[y_coords[start_idx], y_coords[end_idx]],
+                z=[z_coords[start_idx], z_coords[end_idx]],
+                mode='lines',
+                line=dict(color='rgba(100, 100, 100, 0.6)', width=3),
+                showlegend=False
+            ))
+        # Update layout
+        fig.update_layout(
+            title=f"3D Hand Landmarks - {hand_landmarks['label']} Hand",
+            scene=dict(
+                xaxis_title="X",
+                yaxis_title="Y",
+                zaxis_title="Z (Depth)",
+                camera=dict(
+                    eye=dict(x=1.5, y=1.5, z=1.5)
+                )
+            ),
+            width=600,
+            height=500
+        )
+        return fig
+    def create_gesture_feature_radar(self, gesture_features: Dict[str, float]) -> go.Figure:
+        """
+        Create a radar chart for gesture features.
+        Args:
+            gesture_features: Dictionary of gesture features
+        Returns:
+            Plotly radar chart figure
+        """
+        # Normalize features for radar chart
+        features = ['Thumb Ext.', 'Index Ext.', 'Middle Ext.', 'Ring Ext.', 'Pinky Ext.',
+                   'Thumb-Index Angle', 'Palm Orientation', 'Hand Openness']
+        # Extract and normalize values
+        values = [
+            gesture_features.get('thumb_extended', 0),
+            gesture_features.get('index_extended', 0),
+            gesture_features.get('middle_extended', 0),
+            gesture_features.get('ring_extended', 0),
+            gesture_features.get('pinky_extended', 0),
+            gesture_features.get('thumb_index_angle', 0) / 180,  # Normalize angle
+            gesture_features.get('palm_orientation', 0) / 180,   # Normalize angle
+            gesture_features.get('hand_openness', 0)
+        ]
+        fig = go.Figure()
+        fig.add_trace(go.Scatterpolar(
+            r=values,
+            theta=features,
+            fill='toself',
+            name='Gesture Features',
+            line_color='rgb(46, 134, 171)'
+        ))
+        fig.update_layout(
+            polar=dict(
+                radialaxis=dict(
+                    visible=True,
+                    range=[0, 1]
+                )
+            ),
+            title="Gesture Feature Analysis",
+            showlegend=True
+        )
+        return fig
+    def create_confidence_gauge(self, confidence: float, title: str = "Confidence") -> go.Figure:
+        """
+        Create a gauge chart for confidence scores.
+        Args:
+            confidence: Confidence value (0-1)
+            title: Title for the gauge
+        Returns:
+            Plotly gauge figure
+        """
+        fig = go.Figure(go.Indicator(
+            mode="gauge+number+delta",
+            value=confidence * 100,
+            domain={'x': [0, 1], 'y': [0, 1]},
+            title={'text': title},
+            delta={'reference': 80},
+            gauge={
+                'axis': {'range': [None, 100]},
+                'bar': {'color': "darkblue"},
+                'steps': [
+                    {'range': [0, 50], 'color': "lightgray"},
+                    {'range': [50, 80], 'color': "gray"}
+                ],
+                'threshold': {
+                    'line': {'color': "red", 'width': 4},
+                    'thickness': 0.75,
+                    'value': 90
+                }
+            }
+        ))
+        fig.update_layout(height=300)
+        return fig
+def create_comparison_view(original_image: np.ndarray,
+                          annotated_image: np.ndarray) -> np.ndarray:
+    """
+    Create a side-by-side comparison view of original and annotated images.
+    Args:
+        original_image: Original input image
+        annotated_image: Image with landmarks drawn
+    Returns:
+        Combined comparison image
+    """
+    # Ensure both images have the same height
+    height = max(original_image.shape[0], annotated_image.shape[0])
+    # Resize images to same height if needed
+    if original_image.shape[0] != height:
+        aspect_ratio = original_image.shape[1] / original_image.shape[0]
+        new_width = int(height * aspect_ratio)
+        original_image = cv2.resize(original_image, (new_width, height))
+    if annotated_image.shape[0] != height:
+        aspect_ratio = annotated_image.shape[1] / annotated_image.shape[0]
+        new_width = int(height * aspect_ratio)
+        annotated_image = cv2.resize(annotated_image, (new_width, height))
+    # Create comparison image
+    comparison = np.hstack([original_image, annotated_image])
+    # Add labels
+    cv2.putText(comparison, "Original", (10, 30),
+                cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
+    cv2.putText(comparison, "Detected", (original_image.shape[1] + 10, 30),
+                cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
+    return comparison
+def create_processing_timeline(frame_detections: List[Dict[str, Any]]) -> go.Figure:
+    """
+    Create a timeline visualization for video processing results.
+    Args:
+        frame_detections: List of frame detection results
+    Returns:
+        Plotly timeline figure
+    """
+    if not frame_detections:
+        return go.Figure()
+    # Prepare data
+    timestamps = [frame['timestamp'] for frame in frame_detections]
+    hands_detected = [frame['hands_detected'] for frame in frame_detections]
+    frame_numbers = [frame['frame_number'] for frame in frame_detections]
+    # Create timeline plot
+    fig = go.Figure()
+    # Add hands detected over time
+    fig.add_trace(go.Scatter(
+        x=timestamps,
+        y=hands_detected,
+        mode='markers+lines',
+        name='Hands Detected',
+        marker=dict(
+            size=8,
+            color=hands_detected,
+            colorscale='Viridis',
+            showscale=True,
+            colorbar=dict(title="Hands")
+        ),
+        text=[f"Frame {fn}" for fn in frame_numbers],
+        hovertemplate="<b>Frame %{text}</b><br>" +
+                     "Time: %{x:.1f}s<br>" +
+                     "Hands: %{y}<br>" +
+                     "<extra></extra>"
+    ))
+    fig.update_layout(
+        title="Hand Detection Timeline",
+        xaxis_title="Time (seconds)",
+        yaxis_title="Number of Hands Detected",
+        hovermode='closest'
+    )
+    return fig