dnn_space_old

Build error

App Files Files

ckcl commited on Jun 18, 2025

Commit

5e2919a

verified ·

1 Parent(s): 15fdf57

Update app.py

Browse files

Files changed (1) hide show

app.py +195 -1013

app.py CHANGED Viewed

@@ -1,1082 +1,264 @@
 import gradio as gr
 import numpy as np
 import cv2
 from PIL import Image
 import io
 import os
-import json
 import time
-import argparse
-import tensorflow as tf
-from tensorflow import keras
-import math
-from collections import deque
-from mtcnn import MTCNN
-class SpeedDetector:
-    def __init__(self, history_size=30):
-        self.speed_history = deque(maxlen=history_size)
-        self.last_update_time = None
-        self.current_speed = 0
-        self.speed_change_threshold = 5  # km/h
-        self.abnormal_speed_changes = 0
-        self.speed_deviation_sum = 0
-        self.speed_change_score = 0
-        # For optical flow speed estimation
-        self.prev_gray = None
-        self.prev_points = None
-        self.frame_idx = 0
-        self.speed_estimate = 60  # Initial estimate
-    def update_speed(self, speed_km_h):
-        """Update with current speed in km/h"""
-        current_time = time.time()
-        # Add to history
-        self.speed_history.append(speed_km_h)
-        self.current_speed = speed_km_h
-        # Not enough data yet
-        if len(self.speed_history) < 5:
-            return 0
-        # Calculate speed variation metrics
-        speed_arr = np.array(self.speed_history)
-        # 1. Standard deviation of speed
-        speed_std = np.std(speed_arr)
-        # 2. Detect abrupt changes
-        for i in range(1, len(speed_arr)):
-            change = abs(speed_arr[i] - speed_arr[i-1])
-            if change >= self.speed_change_threshold:
-                self.abnormal_speed_changes += 1
-        # 3. Calculate average rate of change
-        changes = np.abs(np.diff(speed_arr))
-        avg_change = np.mean(changes) if len(changes) > 0 else 0
-        # Combine into a score (0-1 range)
-        self.speed_deviation_sum = min(5, speed_std) / 5  # Normalize to 0-1
-        abnormal_change_factor = min(1, self.abnormal_speed_changes / 5)
-        avg_change_factor = min(1, avg_change / self.speed_change_threshold)
-        # Weighted combination
-        self.speed_change_score = (
-            0.4 * self.speed_deviation_sum +
-            0.4 * abnormal_change_factor +
-            0.2 * avg_change_factor
-        )
-        return self.speed_change_score
-    def detect_speed_from_frame(self, frame):
-        """Detect speed from video frame using optical flow"""
-        if frame is None:
-            return self.current_speed
-        # Convert frame to grayscale
-        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-        # For the first frame, initialize points to track
-        if self.prev_gray is None or self.frame_idx % 30 == 0:  # Reset tracking points every 30 frames
-            # Detect good features to track
-            mask = np.zeros_like(gray)
-            # Focus on the lower portion of the frame (road)
-            h, w = gray.shape
-            mask[h//2:, :] = 255
-            corners = cv2.goodFeaturesToTrack(gray, maxCorners=100, qualityLevel=0.01, minDistance=10, mask=mask)
-            if corners is not None and len(corners) > 0:
-                self.prev_points = corners
-                self.prev_gray = gray.copy()
-            else:
-                # No good points to track
-                self.frame_idx += 1
-                return self.current_speed
-        # Calculate optical flow if we have previous points
-        if self.prev_gray is not None and self.prev_points is not None:
-            # Calculate optical flow
-            new_points, status, _ = cv2.calcOpticalFlowPyrLK(self.prev_gray, gray, self.prev_points, None)
-            # Filter only valid points
-            if new_points is not None and status is not None:
-                good_new = new_points[status == 1]
-                good_old = self.prev_points[status == 1]
-                # Calculate flow magnitude
-                if len(good_new) > 0 and len(good_old) > 0:
-                    flow_magnitudes = np.sqrt(
-                        np.sum((good_new - good_old)**2, axis=1)
-                    )
-                    avg_flow = np.mean(flow_magnitudes) if len(flow_magnitudes) > 0 else 0
-                    # Map optical flow to speed change
-                    # Higher flow = faster movement
-                    # This is a simplified mapping and would need calibration for real-world use
-                    flow_threshold = 1.0  # Adjust based on testing
-                    if avg_flow > flow_threshold:
-                        # Movement detected, estimate acceleration
-                        speed_change = min(5, max(-5, (avg_flow - flow_threshold) * 2))
-                        # Add some temporal smoothing to avoid sudden changes
-                        speed_change = speed_change * 0.3  # Reduce magnitude for smoother change
-                    else:
-                        # Minimal movement, slight deceleration (coasting)
-                        speed_change = -0.1
-                    # Update speed with detected change
-                    self.speed_estimate += speed_change
-                    # Keep speed in reasonable range
-                    self.speed_estimate = max(40, min(120, self.speed_estimate))
-                    # Update tracking points
-                    self.prev_points = good_new.reshape(-1, 1, 2)
-            # Update previous gray frame
-            self.prev_gray = gray.copy()
-        self.frame_idx += 1
-        # Check for dashboard speedometer (would require more sophisticated OCR in a real system)
-        # For now, just use our estimated speed
-        detected_speed = self.speed_estimate
-        # Update current speed and trigger speed change detection
-        self.update_speed(detected_speed)
-        return detected_speed
-    def get_speed_change_score(self):
-        """Return a score from 0-1 indicating abnormal speed changes"""
-        return self.speed_change_score
-    def reset(self):
-        """Reset the detector state"""
-        self.speed_history.clear()
-        self.abnormal_speed_changes = 0
-        self.speed_deviation_sum = 0
-        self.speed_change_score = 0
-        self.prev_gray = None
-        self.prev_points = None
-        self.frame_idx = 0
-        self.speed_estimate = 60  # Reset to initial estimate
 class DrowsinessDetector:
     def __init__(self):
         self.model = None
-        self.input_shape = (224, 224, 3)  # Updated to match model's expected input shape
         self.face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
         self.id2label = {0: "notdrowsy", 1: "drowsy"}
         self.label2id = {"notdrowsy": 0, "drowsy": 1}
-        # Speed detector
-        self.speed_detector = SpeedDetector()
-        self.SPEED_CHANGE_WEIGHT = 0.15  # Weight for speed changes in drowsiness calculation
-        # Yawn detection parameters
-        self.MAR_THRESHOLD = 0.5  # Mouth aspect ratio threshold for yawn detection
-        self.yawn_counter = 0
-        self.CONSECUTIVE_YAWN_FRAMES = 10  # Number of consecutive frames to confirm yawn
-        self.last_yawn_time = 0
-        self.YAWN_COOLDOWN = 3  # Seconds between yawn alerts
-        # 嘗試動態 import dlib，並設置 fallback
-        self.landmark_detection_enabled = False
-        try:
-            import dlib
-            self.detector = dlib.get_frontal_face_detector()
-            predictor_path = "shape_predictor_68_face_landmarks.dat"
-            if not os.path.exists(predictor_path):
-                print(f"Warning: {predictor_path} not found. Downloading...")
-                import urllib.request
-                urllib.request.urlretrieve(
-                    "https://github.com/italojs/facial-landmarks-recognition/raw/master/shape_predictor_68_face_landmarks.dat",
-                    predictor_path
-                )
-            self.predictor = dlib.shape_predictor(predictor_path)
-            self.landmark_detection_enabled = True
-            print("Facial landmark detection enabled")
-        except Exception as e:
-            print(f"Warning: Facial landmark detection disabled: {e}")
-            print("The system will use a simpler detection method. For better accuracy, install CMake and dlib.")
-        # Constants for drowsiness detection
-        self.EAR_THRESHOLD = 0.25  # Eye aspect ratio threshold
-        self.CONSECUTIVE_FRAMES = 20
-        self.ear_counter = 0
-        self.GAZE_THRESHOLD = 0.2  # Gaze direction threshold
-        self.HEAD_POSE_THRESHOLD = 0.3  # Head pose threshold
-        # Parameters for weighted ensemble
-        self.MODEL_WEIGHT = 0.45  # Reduced to accommodate speed factor
-        self.EAR_WEIGHT = 0.2
-        self.GAZE_WEIGHT = 0.1
-        self.HEAD_POSE_WEIGHT = 0.1
-        # For tracking across frames
-        self.prev_drowsy_count = 0
-        self.drowsy_history = []
-        self.current_speed = 0  # Current speed in km/h
-        self.mtcnn_detector = MTCNN()
-    def update_speed(self, speed_km_h):
-        """Update the current speed"""
-        self.current_speed = speed_km_h
-        return self.speed_detector.update_speed(speed_km_h)
-    def reset_speed_detector(self):
-        """Reset the speed detector"""
-        self.speed_detector.reset()
-    def load_model(self):
-        """Load the CNN model from local files"""
         try:
-            # Use local model files
-            config_path = "huggingface_model/config.json"
-            model_path = "drowsiness_model.h5"
-            # Load config
-            with open(config_path, 'r') as f:
-                config = json.load(f)
-            # Load the Keras model directly
-            self.model = keras.models.load_model(model_path)
-            # Print model summary for debugging
-            print("Model loaded successfully")
-            print(f"Model input shape: {self.model.input_shape}")
-            self.model.summary()
         except Exception as e:
-            print(f"Error loading CNN model: {str(e)}")
             raise
-    def eye_aspect_ratio(self, eye):
-        """Calculate the eye aspect ratio"""
-        # Compute the euclidean distances between the two sets of vertical eye landmarks
-        A = dist.euclidean(eye[1], eye[5])
-        B = dist.euclidean(eye[2], eye[4])
-        # Compute the euclidean distance between the horizontal eye landmarks
-        C = dist.euclidean(eye[0], eye[3])
-        # Calculate the eye aspect ratio
-        ear = (A + B) / (2.0 * C)
-        return ear
-    def calculate_gaze(self, eye_points, facial_landmarks):
-        """Calculate gaze direction"""
-        left_eye_region = np.array([(facial_landmarks.part(i).x, facial_landmarks.part(i).y) for i in range(36, 42)])
-        right_eye_region = np.array([(facial_landmarks.part(i).x, facial_landmarks.part(i).y) for i in range(42, 48)])
-        # Compute eye centers
-        left_eye_center = left_eye_region.mean(axis=0).astype("int")
-        right_eye_center = right_eye_region.mean(axis=0).astype("int")
-        # Compute the angle between eye centers
-        dY = right_eye_center[1] - left_eye_center[1]
-        dX = right_eye_center[0] - left_eye_center[0]
-        angle = np.degrees(np.arctan2(dY, dX))
-        # Normalize the angle
-        return abs(angle) / 180.0
-    def get_head_pose(self, shape):
-        """Calculate the head pose"""
-        # Get specific facial landmarks for head pose estimation
-        image_points = np.array([
-            (shape.part(30).x, shape.part(30).y),     # Nose tip
-            (shape.part(8).x, shape.part(8).y),       # Chin
-            (shape.part(36).x, shape.part(36).y),     # Left eye left corner
-            (shape.part(45).x, shape.part(45).y),     # Right eye right corner
-            (shape.part(48).x, shape.part(48).y),     # Left mouth corner
-            (shape.part(54).x, shape.part(54).y)      # Right mouth corner
-        ], dtype="double")
-        # A simple head pose estimation using the angle of the face
-        # Calculate center of the face
-        center_x = np.mean([p[0] for p in image_points])
-        center_y = np.mean([p[1] for p in image_points])
-        # Calculate angle with respect to vertical
-        angle = 0
-        if len(image_points) > 2:
-            point1 = image_points[0]  # Nose
-            point2 = image_points[1]  # Chin
-            angle = abs(math.atan2(point2[1] - point1[1], point2[0] - point1[0]))
-        # Normalize to 0-1 range where 0 is upright and 1 is drooping
-        normalized_pose = min(1.0, abs(angle) / (math.pi/2))
-        return normalized_pose
-    def enhance_image(self, frame):
-        # Apply CLAHE to improve contrast
-        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
-        enhanced = clahe.apply(gray)
-        enhanced_bgr = cv2.cvtColor(enhanced, cv2.COLOR_GRAY2BGR)
-        return enhanced_bgr
     def detect_face(self, frame):
-        # Enhance image before detection
-        enhanced_frame = self.enhance_image(frame)
-        # Try MTCNN
-        try:
-            results = self.mtcnn_detector.detect_faces(cv2.cvtColor(enhanced_frame, cv2.COLOR_BGR2RGB))
-            print('MTCNN results:', results)
-            if results:
-                # 選擇最右側的臉（x+w最大者）
-                rightmost = max(results, key=lambda r: r['box'][0] + r['box'][2])
-                x, y, w, h = rightmost['box']
-                x, y = max(0, x), max(0, y)
-                w, h = max(0, w), max(0, h)
-                if x+w > frame.shape[1] or y+h > frame.shape[0] or w == 0 or h == 0:
-                    print('MTCNN box out of bounds or zero size')
-                else:
-                    face = frame[y:y+h, x:x+w]
-                    return face, (x, y, w, h)
-        except Exception as e:
-            print(f"MTCNN detection error: {e}")
-        # Fallback to haarcascade
-        gray = cv2.cvtColor(enhanced_frame, cv2.COLOR_BGR2GRAY)
         faces = self.face_cascade.detectMultiScale(gray, 1.1, 4)
-        print('Haar results:', faces)
         if len(faces) > 0:
-            # 選擇最右側的臉
-            rightmost_idx = np.argmax([x+w for (x, y, w, h) in faces])
-            (x, y, w, h) = faces[rightmost_idx]
-            if w > 0 and h > 0:
-                face = frame[y:y+h, x:x+w]
-                return face, (x, y, w, h)
         return None, None
     def preprocess_image(self, image):
-        """Preprocess the input image for CNN"""
         if image is None:
             return None
-        # Convert to RGB
-        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-        # Resize to model input size (224x224)
-        image = cv2.resize(image, (self.input_shape[0], self.input_shape[1]))
-        # Normalize
-        image = image.astype(np.float32) / 255.0
-        # Add batch dimension
-        image = np.expand_dims(image, axis=0)
-        return image
-    def mouth_aspect_ratio(self, mouth_points):
-        """Calculate the mouth aspect ratio"""
-        # Compute the euclidean distances between the vertical mouth landmarks
-        A = np.linalg.norm(mouth_points[1] - mouth_points[7])
-        B = np.linalg.norm(mouth_points[2] - mouth_points[6])
-        C = np.linalg.norm(mouth_points[3] - mouth_points[5])
-        # Compute the euclidean distance between the horizontal mouth landmarks
-        D = np.linalg.norm(mouth_points[0] - mouth_points[4])
-        # Calculate the mouth aspect ratio
-        mar = (A + B + C) / (2.0 * D)
-        return mar
-    def detect_yawn(self, shape):
-        """Detect if the person is yawning using mouth aspect ratio"""
-        if not self.landmark_detection_enabled:
-            return False, 0
-        # Get mouth landmarks (points 48-68)
-        mouth_points = np.array([(shape.part(i).x, shape.part(i).y) for i in range(48, 68)])
-        # Calculate mouth aspect ratio
-        mar = self.mouth_aspect_ratio(mouth_points)
-        # Check if mouth is open wide enough to be considered a yawn
-        current_time = time.time()
-        if mar > self.MAR_THRESHOLD:
-            self.yawn_counter += 1
-            if self.yawn_counter >= self.CONSECUTIVE_YAWN_FRAMES:
-                # Check if enough time has passed since last yawn alert
-                if current_time - self.last_yawn_time > self.YAWN_COOLDOWN:
-                    self.last_yawn_time = current_time
-                    return True, mar
-        else:
-            self.yawn_counter = 0
-        return False, mar
     def predict(self, image):
-        """Predict drowsiness using multiple features"""
-        try:
-            # Convert image to numpy array if it's not already
-            if isinstance(image, Image.Image):
-                image = np.array(image)
-            # Convert to RGB if image is in BGR format
-            if len(image.shape) == 3 and image.shape[2] == 3:
-                if image.dtype == np.uint8:
-                    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-            # Detect face
-            face, face_coords = self.detect_face(image)
-            if face is None or face_coords is None:
-                return 0, "No face detected", None, 0, 0, 0, 0, 0, False
-            # Initialize feature scores
-            model_score = 0
-            ear_score = 0
-            gaze_score = 0
-            head_pose_score = 0
-            yawn_detected = False
-            mar = 0
-            # Get facial landmarks if available
-            if self.landmark_detection_enabled:
-                gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
-                rect = dlib.rectangle(face_coords[0], face_coords[1], face_coords[0] + face_coords[2], face_coords[1] + face_coords[3])
-                shape = self.predictor(gray, rect)
-                # Calculate EAR
-                left_eye = np.array([(shape.part(i).x, shape.part(i).y) for i in range(36, 42)])
-                right_eye = np.array([(shape.part(i).x, shape.part(i).y) for i in range(42, 48)])
-                ear = (self.eye_aspect_ratio(left_eye) + self.eye_aspect_ratio(right_eye)) / 2.0
-                ear_score = 1.0 if ear < self.EAR_THRESHOLD else 0.0
-                # Calculate gaze direction
-                gaze = self.calculate_gaze([left_eye, right_eye], shape)
-                gaze_score = 1.0 if abs(gaze[0]) > self.GAZE_THRESHOLD or abs(gaze[1]) > self.GAZE_THRESHOLD else 0.0
-                # Calculate head pose
-                head_pose = self.get_head_pose(shape)
-                head_pose_score = 1.0 if abs(head_pose[0]) > self.HEAD_POSE_THRESHOLD or abs(head_pose[1]) > self.HEAD_POSE_THRESHOLD else 0.0
-                # Detect yawn
-                yawn_detected, mar = self.detect_yawn(shape)
-            else:
-                # Fallback: simple EAR/MAR estimation using grayscale intensity
-                # Estimate eye region based on face proportions
-                face_gray = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
-                fh, fw = face_gray.shape[:2]
-                # Approximate left/right eye regions
-                left_eye_region = face_gray[int(fh*0.25):int(fh*0.45), int(fw*0.13):int(fw*0.37)]
-                right_eye_region = face_gray[int(fh*0.25):int(fh*0.45), int(fw*0.63):int(fw*0.87)]
-                # Use average intensity: lower means more likely closed
-                if left_eye_region.size > 0 and right_eye_region.size > 0:
-                    left_eye_avg = np.mean(left_eye_region) / 255.0
-                    right_eye_avg = np.mean(right_eye_region) / 255.0
-                    # Invert so that darker regions (potentially closed eyes) have higher values
-                    left_eye_closed = 1.0 - left_eye_avg
-                    right_eye_closed = 1.0 - right_eye_avg
-                    # Combine into a simple eye closure metric (0-1 range, higher means more closed)
-                    eye_closure = (left_eye_closed + right_eye_closed) / 2.0
-                    # Convert to a rough approximation of EAR
-                    estimated_ear = max(0.15, 0.4 - (eye_closure * 0.25))
-                    ear_score = 1.0 if estimated_ear < self.EAR_THRESHOLD else 0.0
-                # Fallback MAR: use mouth region intensity
-                mouth_region = face_gray[int(fh*0.65):int(fh*0.90), int(fw*0.25):int(fw*0.75)]
-                if mouth_region.size > 0:
-                    mar = np.mean(mouth_region) / 255.0
-                    yawn_detected = False  # fallback下不判斷yawn，避免誤判
-            # Get CNN model prediction
-            processed_image = self.preprocess_image(face)
-            if self.model is not None:
-                model_pred = self.model.predict(processed_image, verbose=0)
-                if len(model_pred.shape) == 2:
-                    if model_pred.shape[1] == 1:
-                        model_score = float(model_pred[0][0])
-                    else:
-                        model_score = float(model_pred[0][1])
-                else:
-                    model_score = float(model_pred[0])
-            # Calculate weighted ensemble score
-            ensemble_score = (
-                self.MODEL_WEIGHT * model_score +
-                self.EAR_WEIGHT * ear_score +
-                self.GAZE_WEIGHT * gaze_score +
-                self.HEAD_POSE_WEIGHT * head_pose_score
-            )
-            # Add speed factor if available
-            if self.current_speed > 0:
-                speed_score = self.speed_detector.get_speed_change_score()
-                ensemble_score = (1 - self.SPEED_CHANGE_WEIGHT) * ensemble_score + self.SPEED_CHANGE_WEIGHT * speed_score
-            # Update drowsy history
-            self.drowsy_history.append(ensemble_score)
-            if len(self.drowsy_history) > 30:  # Keep last 30 frames
-                self.drowsy_history.pop(0)
-            # Calculate average drowsiness over recent frames
-            avg_drowsiness = np.mean(self.drowsy_history) if self.drowsy_history else 0
-            # Determine final drowsiness state
-            is_drowsy = avg_drowsiness > 0.5
-            # Debug output
-            print(f"[DEBUG] Model score: {model_score:.2f}, EAR: {ear_score:.2f}, MAR: {mar:.2f}, Drowsy: {is_drowsy}, Yawn: {yawn_detected}")
-            # 強化EAR判斷：若模型分數高但EAR也高，強制標註為Alert
-            if metrics['model_prob'] > 0.7 and metrics['ear'] > 0.25:
-                is_drowsy = False
-                alert_level = "Alert"
-                color = (0, 255, 0)
-            elif avg_drowsiness > 0.5:
-                alert_level = "Drowsy"
-                color = (0, 0, 255)
-            else:
-                alert_level = "Not Drowsy"
-                color = (0, 255, 0)
-            return (
-                ensemble_score,
-                alert_level,
-                face_coords,
-                ear_score,
-                gaze_score,
-                head_pose_score,
-                model_score,
-                mar,
-                yawn_detected
-            )
-        except Exception as e:
-            print(f"Error in predict: {str(e)}")
-            return 0, "Error in prediction", None, 0, 0, 0, 0, 0, False
-# Create a global instance
 detector = DrowsinessDetector()
-def process_image(image):
-    """Process image input"""
-    if image is None:
-        return None, "No image provided"
     try:
-        if image.size == 0 or image.shape[0] == 0 or image.shape[1] == 0:
-            return None, "Invalid image dimensions"
-        processed_image = image.copy()
-        result = detector.predict(processed_image)
-        if len(result) == 9:
-            drowsy_prob, status, face_coords, ear_score, gaze_score, head_pose_score, model_score, mar, yawn_detected = result
-            metrics = {
-                'model_prob': model_score,
-                'ear': ear_score,
-                'gaze': gaze_score,
-                'head_pose': head_pose_score,
-                'mar': mar,
-                'yawn_detected': yawn_detected
-            }
-            error = None
-        elif len(result) == 4:
-            drowsy_prob, face_coords, error, metrics = result
-        elif len(result) == 2:
-            return result
-        else:
-            return None, "Unknown error in prediction"
-        if error:
-            return None, error
-        if face_coords is None:
-            cv2.putText(processed_image, "Face detection error", (30, 30),
-                        cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 0, 255), 2)
-            return processed_image, "Face detection error"
-        if not (isinstance(face_coords, (tuple, list)) and len(face_coords) == 4):
-            cv2.putText(processed_image, "Face detection error", (30, 60),
-                        cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 0, 255), 2)
-            return processed_image, "Face detection error: invalid coordinates"
-        x, y, w, h = face_coords
-        # 強化EAR判斷：若模型分數高但EAR也高，強制標註為Alert
-        is_drowsy = drowsy_prob >= 0.7
-        if metrics['model_prob'] > 0.7 and metrics['ear'] > 0.25:
-            is_drowsy = False
-            alert_level = "Alert"
-            color = (0, 255, 0)
-        elif drowsy_prob >= 0.85:
-            alert_level = "High Risk"
-            color = (0, 0, 255)
-        elif drowsy_prob >= 0.7:
-            alert_level = "Medium Risk"
-            color = (0, 165, 255)
-        else:
-            alert_level = "Alert"
-            color = (0, 255, 0)
-        cv2.rectangle(processed_image, (x, y), (x+w, y+h), color, 2)
-        y_offset = 25
-        cv2.putText(processed_image, f"{'Drowsy' if is_drowsy else 'Alert'} ({drowsy_prob:.2f})",
-                    (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
-        cv2.putText(processed_image, alert_level, (x, y-35),
-                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)
-        cv2.putText(processed_image, f"Model: {metrics['model_prob']:.2f}", (10, processed_image.shape[0]-10-y_offset*3),
-                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
-        cv2.putText(processed_image, f"Eye Ratio: {metrics['ear']:.2f}", (10, processed_image.shape[0]-10-y_offset*2),
-                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
-        cv2.putText(processed_image, f"Head Pose: {metrics['head_pose']:.2f}", (10, processed_image.shape[0]-10-y_offset),
-                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
-        if 'mar' in metrics:
-            cv2.putText(processed_image, f"MAR: {metrics['mar']:.2f}", (10, processed_image.shape[0]-10-y_offset*4),
-                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
-        if metrics.get('yawn_detected'):
-            cv2.putText(processed_image, "YAWN DETECTED!", (x, y-60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
-        if metrics['model_prob'] > 0.9 and metrics['ear'] > 0.25:
-            cv2.putText(processed_image, "Model conflict - verify manually",
-                        (10, processed_image.shape[0]-10-y_offset*5),
-                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 165, 255), 1)
-        return processed_image, f"Processed successfully. Drowsiness: {drowsy_prob:.2f}, Alert level: {alert_level}"
     except Exception as e:
-        import traceback
-        error_details = traceback.format_exc()
-        print(f"Error processing image: {str(e)}\n{error_details}")
-        return None, f"Error processing image: {str(e)}"
-def annotate_no_face(frame, head_moving=False):
-    annotated = frame.copy()
-    msg = "未偵測到臉部，請調整姿勢"
-    color = (0, 0, 255)
-    if head_moving:
-        msg = "頭部晃動，請注意安全"
-        color = (0, 165, 255)
-    cv2.putText(annotated, msg, (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
-    return annotated
-def process_video(video, initial_speed=60):
-    """Process video input"""
-    if video is None:
-        return None, "No video provided"
     try:
-        # 创建内存缓冲区而不是临时文件
-        temp_input = None
-        # Handle video input (can be file path or video data)
-        if isinstance(video, str):
-            print(f"Processing video from path: {video}")
-            # 直接读取原始文件，不复制到临时目录
-            cap = cv2.VideoCapture(video)
-        else:
-            print(f"Processing video from uploaded data")
-            # 读取上传的视频数据到内存
-            import tempfile
-            temp_input = tempfile.NamedTemporaryFile(suffix='.avi', delete=False)
-            temp_input_path = temp_input.name
-            with open(temp_input_path, "wb") as f:
-                f.write(video)
-            cap = cv2.VideoCapture(temp_input_path)
-        if not cap.isOpened():
-            return None, "Error: Could not open video"
         # Get input video properties
         fps = cap.get(cv2.CAP_PROP_FPS)
-        if fps <= 0:
-            fps = 30  # Default to 30fps if invalid
         width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
         height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-        print(f"Video properties: {width}x{height} at {fps}fps, total frames: {total_frames}")
-        # 使用临时文件来存储处理后的视频（处理完毕后会删除）
-        import tempfile
-        temp_output = tempfile.NamedTemporaryFile(suffix='.avi', delete=False)
-        temp_output_path = temp_output.name
-        # 使用XVID编码并输出为AVI格式
-        fourcc = cv2.VideoWriter_fourcc(*'XVID')
-        out = cv2.VideoWriter(temp_output_path, fourcc, fps, (width, height))
-        if not out.isOpened():
-            return None, "Error: Could not create output video file"
-        # Reset speed detector at the start of each video
-        detector.reset_speed_detector()
-        # Initialize speed value with the provided initial speed
-        current_speed = initial_speed
-        detector.speed_detector.speed_estimate = initial_speed
-        # Process each frame
-        frame_count = 0
-        processed_count = 0
-        face_detected_count = 0
-        drowsy_count = 0
-        high_risk_count = 0
-        ear_sum = 0
-        model_prob_sum = 0
-        yawn_count = 0
-        # Calculate frames to skip for 2 FPS processing
-        frames_to_skip = max(1, int(fps / 2))
-        print(f"Processing at 2 FPS: skipping {frames_to_skip-1} frames between processed frames")
         while True:
             ret, frame = cap.read()
             if not ret:
-                print(f"End of video or error reading frame at frame {frame_count}")
                 break
-            frame_count += 1
-            # Skip frames to maintain 2 FPS processing
-            if frame_count % frames_to_skip != 0:
-                # 仍然要標註狀態，不能直接複製原圖
-                # 嘗試用光流判斷頭部是否晃動
-                head_moving = False
-                try:
-                    # 使用SpeedDetector的optical flow估算頭部移動
-                    # 這裡只用flow magnitude判斷
-                    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-                    if detector.speed_detector.prev_gray is not None:
-                        flow = cv2.absdiff(gray, detector.speed_detector.prev_gray)
-                        mean_flow = np.mean(flow)
-                        head_moving = mean_flow > 8  # 閾值可調
-                    detector.speed_detector.prev_gray = gray.copy()
-                except Exception as e:
-                    pass
-                annotated = annotate_no_face(frame, head_moving=head_moving)
-                out.write(annotated)
-                continue
-            # Detect speed from the current frame
-            current_speed = detector.speed_detector.detect_speed_from_frame(frame)
-            try:
-                # Try to process the frame
-                processed_frame, message = process_image(frame)
-                # Add speed info to the frame
-                if processed_frame is not None:
-                    speed_text = f"Speed: {current_speed:.1f} km/h"
-                    cv2.putText(processed_frame, speed_text, (10, processed_frame.shape[0]-45),
-                                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
-                    # Add speed change score
-                    speed_change_score = detector.speed_detector.get_speed_change_score()
-                    cv2.putText(processed_frame, f"Speed Variation: {speed_change_score:.2f}",
-                                (10, processed_frame.shape[0]-70),
-                                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
-                # 確保每一幀尺寸正確
-                if processed_frame is not None:
-                    if processed_frame.shape[1] != width or processed_frame.shape[0] != height:
-                        processed_frame = cv2.resize(processed_frame, (width, height))
-                    # 若無臉，則標註未偵測到臉或頭部晃動
-                    if "Face detection error" in message or "No face detected" in message or (isinstance(processed_frame, np.ndarray) and np.all(processed_frame == frame)):
-                        # 嘗試用光流判斷頭部是否晃動
-                        head_moving = False
-                        try:
-                            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-                            if detector.speed_detector.prev_gray is not None:
-                                flow = cv2.absdiff(gray, detector.speed_detector.prev_gray)
-                                mean_flow = np.mean(flow)
-                                head_moving = mean_flow > 8
-                            detector.speed_detector.prev_gray = gray.copy()
-                        except Exception as e:
-                            pass
-                        processed_frame = annotate_no_face(frame, head_moving=head_moving)
-                    out.write(processed_frame)
-                    processed_count += 1
-                    if "No face detected" not in message:
-                        face_detected_count += 1
-                        if "Drowsiness" in message:
-                            # Extract drowsiness probability
-                            try:
-                                drowsy_text = message.split("Drowsiness: ")[1].split(",")[0]
-                                drowsy_prob = float(drowsy_text)
-                                # Track drowsiness stats
-                                if drowsy_prob >= 0.7:
-                                    drowsy_count += 1
-                                if drowsy_prob >= 0.85:
-                                    high_risk_count += 1
-                                # Get metrics from the frame
-                                result = detector.predict(frame)
-                                if len(result) == 9:
-                                    _, _, _, ear_score, _, _, model_score, _, yawn_detected = result
-                                    ear_sum += ear_score
-                                    model_prob_sum += model_score
-                                    if yawn_detected:
-                                        yawn_count += 1
-                                elif len(result) == 4:
-                                    _, _, _, metrics = result
-                                    if 'ear' in metrics:
-                                        ear_sum += metrics['ear']
-                                    if 'model_prob' in metrics:
-                                        model_prob_sum += metrics['model_prob']
-                                    if 'yawn_detected' in metrics and metrics['yawn_detected']:
-                                        yawn_count += 1
-                            except:
-                                pass
-                else:
-                    # Fallback: If processing fails, just用annotate_no_face標註
-                    head_moving = False
-                    try:
-                        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-                        if detector.speed_detector.prev_gray is not None:
-                            flow = cv2.absdiff(gray, detector.speed_detector.prev_gray)
-                            mean_flow = np.mean(flow)
-                            head_moving = mean_flow > 8
-                        detector.speed_detector.prev_gray = gray.copy()
-                    except Exception as e:
-                        pass
-                    processed_frame = annotate_no_face(frame, head_moving=head_moving)
-                    out.write(processed_frame)
-                    processed_count += 1
-                    print(f"Frame {frame_count}: Processing failed - {message}")
-            except Exception as e:
-                # If any error occurs during processing, use original frame
-                cv2.putText(frame, f"Error: {str(e)[:30]}", (30, 30),
-                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
-                out.write(frame)
-                processed_count += 1
-                print(f"Frame {frame_count}: Exception - {str(e)}")
-            # Print progress for every 10th frame
-            if frame_count % 10 == 0:
-                print(f"Processed {frame_count}/{total_frames} frames")
         # Release resources
         cap.release()
         out.release()
-        # Calculate statistics
-        drowsy_percentage = (drowsy_count / face_detected_count * 100) if face_detected_count > 0 else 0
-        high_risk_percentage = (high_risk_count / face_detected_count * 100) if face_detected_count > 0 else 0
-        avg_ear = ear_sum / face_detected_count if face_detected_count > 0 else 0
-        avg_model_prob = model_prob_sum / face_detected_count if face_detected_count > 0 else 0
-        speed_score = detector.speed_detector.get_speed_change_score()
-        yawn_percentage = (yawn_count / face_detected_count * 100) if face_detected_count > 0 else 0
-        # Check if video was created successfully and return it directly
-        if os.path.exists(temp_output_path) and os.path.getsize(temp_output_path) > 0:
-            print(f"Video processed successfully with {processed_count} frames")
-            print(f"Drowsy frames: {drowsy_count} ({drowsy_percentage:.1f}%), High risk frames: {high_risk_count} ({high_risk_percentage:.1f}%)")
-            print(f"Average eye ratio: {avg_ear:.2f}, Average model probability: {avg_model_prob:.2f}")
-            print(f"Speed change score: {speed_score:.2f}")
-            print(f"Yawn frames: {yawn_count} ({yawn_percentage:.1f}%)")
-            false_positive_warning = ""
-            if avg_model_prob > 0.8 and avg_ear > 0.25:
-                false_positive_warning = " ⚠️ Possible false positive (eyes open but model detects drowsiness)"
-            result_message = (f"Video processed successfully. Frames: {frame_count}, faces detected: {face_detected_count}, "
-                             f"drowsy: {drowsy_count} ({drowsy_percentage:.1f}%), high risk: {high_risk_count} ({high_risk_percentage:.1f}%), "
-                             f"yawn: {yawn_count} ({yawn_percentage:.1f}%). "
-                             f"Avg eye ratio: {avg_ear:.2f}, Speed score: {speed_score:.2f}{false_positive_warning}")
-            video_result = temp_output_path
-            return video_result, result_message
         else:
-            print(f"Failed to create output video. Frames read: {frame_count}, processed: {processed_count}")
-            return None, f"Error: Failed to create output video. Frames read: {frame_count}, processed: {processed_count}"
     except Exception as e:
-        import traceback
-        error_details = traceback.format_exc()
-        print(f"Error processing video: {str(e)}\n{error_details}")
-        return None, f"Error processing video: {str(e)}"
     finally:
-        if 'out' in locals() and out is not None:
             out.release()
-        if 'cap' in locals() and cap is not None:
             cap.release()
-        if temp_input is not None:
-            try:
-                os.unlink(temp_input.name)
-            except:
-                pass
-def process_webcam(image):
-    """Process webcam input"""
     try:
-        # Convert image to numpy array if it's not already
-        if isinstance(image, Image.Image):
-            image = np.array(image)
-        # Convert to RGB if image is in BGR format
-        if len(image.shape) == 3 and image.shape[2] == 3:
-            if image.dtype == np.uint8:
-                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-        # Get prediction
-        drowsy_prob, status, face_coords, ear_score, gaze_score, head_pose_score, model_score, mar, yawn_detected = detector.predict(image)
-        # Draw results on image
-        if face_coords is not None:
-            x, y, w, h = face_coords
-            # Draw face rectangle
-            cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
-            # Add status text
-            status_color = (0, 0, 255) if status == "Drowsy" else (0, 255, 0)
-            cv2.putText(image, f"Status: {status}", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, status_color, 2)
-            # Add yawn detection text if yawn is detected
-            if yawn_detected:
-                cv2.putText(image, "YAWN DETECTED!", (x, y - 40), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
-                # Play alert sound
-                try:
-                    import winsound
-                    winsound.Beep(1000, 500)  # Frequency: 1000Hz, Duration: 500ms
-                except:
-                    print("Beep!")
-            # Add metrics
-            cv2.putText(image, f"EAR: {ear_score:.2f}", (x, y + h + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
-            cv2.putText(image, f"Gaze: {gaze_score:.2f}", (x, y + h + 40), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
-            cv2.putText(image, f"Head: {head_pose_score:.2f}", (x, y + h + 60), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
-            cv2.putText(image, f"MAR: {mar:.2f}", (x, y + h + 80), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
-        return image
     except Exception as e:
-        print(f"Error in process_webcam: {str(e)}")
-        return image
-# Launch the app
-if __name__ == "__main__":
-    # Parse command line arguments
-    parser = argparse.ArgumentParser(description="Driver Drowsiness Detection App")
-    parser.add_argument("--share", action="store_true", help="Create a public link (may trigger security warnings)")
-    parser.add_argument("--port", type=int, default=7860, help="Port to run the app on")
-    args = parser.parse_args()
-    # Print warning if share is enabled
-    if args.share:
-        print("WARNING: Running with --share may trigger security warnings on some systems.")
-        print("The app will be accessible from the internet through a temporary URL.")
-    # 注册退出时的清理函数
-    import atexit
-    import glob
-    import shutil
-    def cleanup_temp_files():
-        """Clean up all temporary files"""
-        try:
-            # 删除所有可能留下的临时文件
-            import tempfile
-            temp_dir = tempfile.gettempdir()
-            pattern = os.path.join(temp_dir, "tmp*")
-            for file in glob.glob(pattern):
-                try:
-                    if os.path.isfile(file):
-                        os.remove(file)
-                except Exception as e:
-                    print(f"Failed to delete {file}: {e}")
-            # 确保没有留下.mp4或.avi文件
-            for ext in [".mp4", ".avi"]:
-                pattern = os.path.join(temp_dir, f"*{ext}")
-                for file in glob.glob(pattern):
-                    try:
-                        os.remove(file)
-                    except Exception as e:
-                        print(f"Failed to delete {file}: {e}")
-            print("Cleaned up temporary files")
-        except Exception as e:
-            print(f"Error during cleanup: {e}")
-    # 注册清理函数
-    atexit.register(cleanup_temp_files)
-    # Load the model at startup
-    detector.load_model()
-    # Create interface
-    with gr.Blocks(title="Driver Drowsiness Detection") as demo:
-        gr.Markdown("""
-        # 🚗 Driver Drowsiness Detection System
-        This system detects driver drowsiness using computer vision and deep learning.
-        ## Features:
-        - Image analysis
-        - Video processing with speed monitoring
-        - Webcam detection (PC and mobile)
-        - Multi-factor drowsiness prediction (face, eyes, head pose, speed changes)
-        """)
-        with gr.Tabs():
-            with gr.Tab("Image"):
-                gr.Markdown("Upload an image for drowsiness detection")
-                with gr.Row():
-                    image_input = gr.Image(label="Input Image", type="numpy")
-                    image_output = gr.Image(label="Processed Image")
-                with gr.Row():
-                    status_output = gr.Textbox(label="Status")
-                image_input.change(
-                    fn=process_image,
-                    inputs=[image_input],
-                    outputs=[image_output, status_output]
-                )
-            with gr.Tab("Video"):
-                gr.Markdown("""
-                ###                 ### Upload driving videos for sleepy detection
-                The system will automatically detect the following content from the video:
-                - Driver's facial expressions and eye status
-                - Vehicle speed changes (by optical flow analysis in video)
-                - When the vehicle speed changes more than ±5 km/h, it will be considered abnormal driving behavior
-                   ** Note: ** The processed videos will not be saved to the local folder.
-                   Please use the download button in the upper right corner of the interface to save the results.
-                """)
-                with gr.Row():
-                    video_input = gr.Video(label="Enter video")
-                    video_output = gr.Video(label="Processed video (Click on the upper right corner to download)")
-                with gr.Row():
-                    initial_speed = gr.Slider(minimum=10, maximum=120, value=60, label="Initial speed estimate (km/h)",
-                                             info="As initial estimate only, The system will automatically detect the actual speed changes from the video")
-                with gr.Row():
-                    video_status = gr.Textbox(label="Processing status")
-                with gr.Row():
-                    process_btn = gr.Button("Processing videos")
-                    clear_btn = gr.Button("Clear")
-                process_btn.click(
-                    fn=process_video,
-                    inputs=[video_input, initial_speed],
-                    outputs=[video_output, video_status]
-                )
-                clear_btn.click(
-                    fn=lambda: (None, "Cleared results"),
-                    inputs=[],
-                    outputs=[video_output, video_status]
-                )
-            with gr.Tab("Webcam"):
-                gr.Markdown("Use your webcam or mobile camera for real-time drowsiness detection")
-                with gr.Row():
-                    webcam_input = gr.Image(label="Camera Feed", type="numpy", streaming=True)
-                    webcam_output = gr.Image(label="Processed Feed")
-                with gr.Row():
-                    speed_input = gr.Slider(minimum=0, maximum=150, value=60, label="Current Speed (km/h)")
-                    update_speed_btn = gr.Button("Update Speed")
-                with gr.Row():
-                    webcam_status = gr.Textbox(label="Status")
-                def process_webcam_with_speed(image, speed):
-                    detector.update_speed(speed)
-                    return process_webcam(image)
-                update_speed_btn.click(
-                    fn=lambda speed: f"Speed updated to {speed} km/h",
-                    inputs=[speed_input],
-                    outputs=[webcam_status]
-                )
-                webcam_input.change(
-                    fn=process_webcam_with_speed,
-                    inputs=[webcam_input, speed_input],
-                    outputs=[webcam_output, webcam_status]
-                )
-        gr.Markdown("""
-        ## How It Works
-        This system detects drowsiness using multiple factors:
-        1. **Facial features** - Using a trained CNN model
-        2. **Eye openness** - Measuring eye aspect ratio (EAR)
-        3. **Head position** - Detecting head drooping
-        4. **Automatic speed detection** - Using optical flow analysis to track vehicle movement and detect irregular speed changes
-        The system automatically detects speed changes from the video frames using computer vision techniques:
-        - **Optical flow** is used to track movement between frames
-        - **Irregular speed changes** (±5 km/h) are detected as potential signs of drowsy driving
-        - **No external speed data required** - everything is analyzed directly from the video content
-        Combining these factors provides more reliable drowsiness detection than using facial features alone.
-        """)
-    # Launch the app
-    demo.launch(share=args.share, server_port=args.port)

 import gradio as gr
+import torch
+from transformers import ViTForImageClassification, ViTImageProcessor
 import numpy as np
 import cv2
 from PIL import Image
 import io
 import os
+import sys
 import time
 class DrowsinessDetector:
     def __init__(self):
         self.model = None
+        self.processor = None
+        self.input_shape = (224, 224, 3)
         self.face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
         self.id2label = {0: "notdrowsy", 1: "drowsy"}
         self.label2id = {"notdrowsy": 0, "drowsy": 1}
+    def load_model(self, model_path):
+        """Load the ViT model and processor from the specified path or directory"""
         try:
+            self.model = ViTForImageClassification.from_pretrained(
+                model_path,  # 直接給資料夾路徑
+                num_labels=2,
+                id2label=self.id2label,
+                label2id=self.label2id,
+                ignore_mismatched_sizes=True
+            )
+            self.model.eval()
+            self.processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224")
+            print(f"ViT model loaded successfully from {model_path}")
         except Exception as e:
+            print(f"Error loading ViT model: {str(e)}")
             raise
     def detect_face(self, frame):
+        """Detect face in the frame"""
+        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
         faces = self.face_cascade.detectMultiScale(gray, 1.1, 4)
         if len(faces) > 0:
+            (x, y, w, h) = faces[0]  # Get the first face
+            face = frame[y:y+h, x:x+w]
+            return face, (x, y, w, h)
         return None, None
     def preprocess_image(self, image):
+        """Preprocess the input image for ViT"""
         if image is None:
             return None
+        pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+        inputs = self.processor(images=pil_img, return_tensors="pt")
+        return inputs
     def predict(self, image):
+        """Make prediction on the input image using ViT"""
+        if self.model is None or self.processor is None:
+            raise ValueError("Model not loaded. Call load_model() first.")
+        # Detect face
+        face, face_coords = self.detect_face(image)
+        if face is None:
+            return None, None, "No face detected"
+        # Preprocess the face image
+        inputs = self.preprocess_image(face)
+        if inputs is None:
+            return None, None, "Error processing image"
+        # Make prediction
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+            logits = outputs.logits
+            probs = torch.softmax(logits, dim=1)
+            pred_class = torch.argmax(probs, dim=1).item()
+            pred_label = self.id2label[pred_class]
+            pred_prob = probs[0, pred_class].item()
+        # Return drowsy probability (class 1)
+        drowsy_prob = probs[0, 1].item()
+        return drowsy_prob, face_coords, None
+# Initialize detector
 detector = DrowsinessDetector()
+def find_model_file():
+    """Find the model directory or file in common locations"""
+    possible_paths = [
+        "huggingface_model",  # 優先資料夾
+        "pytorch_model.bin",
+        "model_weights.h5",
+        "drowsiness_model.h5",
+        "model/drowsiness_model.h5",
+        "models/drowsiness_model.h5",
+        "huggingface_model/model_weights.h5",
+        "huggingface_model/drowsiness_model.h5",
+        "../model_weights.h5",
+        "../drowsiness_model.h5"
+    ]
+    for path in possible_paths:
+        if os.path.exists(path):
+            return path
+    return None
+def load_model():
+    """Load the model"""
+    model_path = find_model_file()
+    if model_path is None:
+        print("\nError: Model file not found!")
+        print("\nPlease ensure one of the following files exists:")
+        print("1. model_weights.h5")
+        print("2. drowsiness_model.h5")
+        print("3. model/drowsiness_model.h5")
+        print("4. models/drowsiness_model.h5")
+        print("\nYou can download the model from Hugging Face Hub or train it using train_model.py")
+        sys.exit(1)
     try:
+        detector.load_model(model_path)
     except Exception as e:
+        print(f"\nError loading model: {str(e)}")
+        sys.exit(1)
+def process_frame(frame):
+    """Process a single frame"""
+    if frame is None:
+        return None
     try:
+        # Convert frame to RGB if needed
+        if len(frame.shape) == 2:
+            frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
+        elif frame.shape[2] == 4:
+            frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2RGB)
+        # Make prediction
+        drowsy_prob, face_coords, error = detector.predict(frame)
+        if error:
+            return frame
+        if face_coords is not None:
+            x, y, w, h = face_coords
+            # Draw rectangle around face
+            color = (0, 0, 255) if drowsy_prob > 0.7 else (0, 255, 0)
+            cv2.rectangle(frame, (x, y), (x+w, y+h), color, 2)
+            # Add text
+            status = "DROWSY" if drowsy_prob > 0.7 else "ALERT"
+            cv2.putText(frame, f"{status} ({drowsy_prob:.2%})",
+                       (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
+        return frame
+    except Exception as e:
+        print(f"Error processing frame: {str(e)}")
+        return frame
+def process_video(video_input):
+    """Process video input"""
+    if video_input is None:
+        return None
+    try:
         # Get input video properties
+        cap = cv2.VideoCapture(video_input)
         fps = cap.get(cv2.CAP_PROP_FPS)
         width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
         height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        # Create temporary output video file
+        temp_output = "temp_output.mp4"
+        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+        out = cv2.VideoWriter(temp_output, fourcc, fps, (width, height))
         while True:
             ret, frame = cap.read()
             if not ret:
                 break
+            processed_frame = process_frame(frame)
+            if processed_frame is not None:
+                out.write(processed_frame)
         # Release resources
         cap.release()
         out.release()
+        # Check if video was created
+        if os.path.exists(temp_output) and os.path.getsize(temp_output) > 0:
+            return temp_output
         else:
+            print("Error: Failed to create output video")
+            return None
     except Exception as e:
+        print(f"Error processing video: {str(e)}")
+        return None
     finally:
+        # Clean up temporary file
+        if 'out' in locals():
             out.release()
+        if 'cap' in locals():
             cap.release()
+def webcam_feed():
+    """Process webcam feed"""
     try:
+        cap = cv2.VideoCapture(0)
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                break
+            processed_frame = process_frame(frame)
+            if processed_frame is not None:
+                yield processed_frame
     except Exception as e:
+        print(f"Error processing webcam feed: {str(e)}")
+        yield None
+    finally:
+        cap.release()
+# Load the model at startup
+load_model()
+# Create interface
+with gr.Blocks(title="Driver Drowsiness Detection") as demo:
+    gr.Markdown("""
+    # 🚗 Driver Drowsiness Detection System
+    This system detects driver drowsiness using computer vision and deep learning.
+    ## Features:
+    - Real-time webcam monitoring
+    - Video file processing
+    - Single image analysis
+    - Face detection and drowsiness prediction
+    """)
+    with gr.Tabs():
+        with gr.Tab("Webcam"):
+            gr.Markdown("Real-time drowsiness detection using your webcam")
+            webcam_output = gr.Image(label="Live Detection")
+            webcam_button = gr.Button("Start Webcam")
+            webcam_button.click(fn=webcam_feed, inputs=None, outputs=webcam_output)
+        with gr.Tab("Video"):
+            gr.Markdown("Upload a video file for drowsiness detection")
+            with gr.Row():
+                video_input = gr.Video(label="Input Video")
+                video_output = gr.Video(label="Detection Result")
+            video_button = gr.Button("Process Video")
+            video_button.click(fn=process_video, inputs=video_input, outputs=video_output)
+        with gr.Tab("Image"):
+            gr.Markdown("Upload an image for drowsiness detection")
+            with gr.Row():
+                image_input = gr.Image(type="numpy", label="Input Image")
+                image_output = gr.Image(label="Detection Result")
+            image_button = gr.Button("Process Image")
+            image_button.click(fn=process_frame, inputs=image_input, outputs=image_output)
+if __name__ == "__main__":
+    demo.launch()