ckcl commited on
Commit
64751dd
·
verified ·
1 Parent(s): 255b286

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +821 -118
app.py CHANGED
@@ -1,59 +1,311 @@
1
  import gradio as gr
2
- import torch
3
- import torch.nn as nn
4
  import numpy as np
5
  import cv2
6
  from PIL import Image
7
  import io
8
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  class DrowsinessDetector:
11
  def __init__(self):
12
  self.model = None
13
- self.input_shape = (64, 64, 3)
14
  self.face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
15
  self.id2label = {0: "notdrowsy", 1: "drowsy"}
16
  self.label2id = {"notdrowsy": 0, "drowsy": 1}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  def load_model(self):
19
- """Load the CNN model from Hugging Face Hub"""
20
  try:
21
- model_id = "ckcl/driver-drowsiness-detector"
22
- # Load the model configuration
23
- config = torch.load(f"{model_id}/config.json")
24
-
25
- # Create CNN model
26
- self.model = nn.Sequential(
27
- nn.Conv2d(3, 32, kernel_size=3, padding=1),
28
- nn.BatchNorm2d(32),
29
- nn.ReLU(),
30
- nn.MaxPool2d(2),
31
-
32
- nn.Conv2d(32, 64, kernel_size=3, padding=1),
33
- nn.BatchNorm2d(64),
34
- nn.ReLU(),
35
- nn.MaxPool2d(2),
36
-
37
- nn.Conv2d(64, 128, kernel_size=3, padding=1),
38
- nn.BatchNorm2d(128),
39
- nn.ReLU(),
40
- nn.MaxPool2d(2),
41
-
42
- nn.Flatten(),
43
- nn.Linear(128 * 8 * 8, 128),
44
- nn.BatchNorm1d(128),
45
- nn.ReLU(),
46
- nn.Dropout(0.5),
47
- nn.Linear(128, 2)
48
- )
49
 
50
- # Load the model weights
51
- self.model.load_state_dict(torch.load(f"{model_id}/pytorch_model.bin"))
52
- self.model.eval()
53
- print(f"CNN model loaded successfully from {model_id}")
54
  except Exception as e:
55
  print(f"Error loading CNN model: {str(e)}")
56
  raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  def detect_face(self, frame):
59
  """Detect face in the frame"""
@@ -71,36 +323,168 @@ class DrowsinessDetector:
71
  return None
72
  # Convert to RGB
73
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
74
- # Resize to model input size
75
  image = cv2.resize(image, (self.input_shape[0], self.input_shape[1]))
76
  # Normalize
77
  image = image.astype(np.float32) / 255.0
78
- # Convert to tensor and add batch dimension
79
- image = torch.from_numpy(image).permute(2, 0, 1).unsqueeze(0)
80
  return image
81
 
82
  def predict(self, image):
83
- """Make prediction on the input image using CNN"""
84
  if self.model is None:
85
  raise ValueError("Model not loaded. Call load_model() first.")
 
 
 
 
 
 
 
 
 
86
  # Detect face
 
87
  face, face_coords = self.detect_face(image)
 
88
  if face is None:
89
- return None, None, "No face detected"
90
- # Preprocess the face image
 
91
  inputs = self.preprocess_image(face)
92
  if inputs is None:
93
- return None, None, "Error processing image"
94
- # Make prediction
95
- with torch.no_grad():
96
- outputs = self.model(inputs)
97
- probs = torch.softmax(outputs, dim=1)
98
- pred_class = torch.argmax(probs, dim=1).item()
99
- pred_label = self.id2label[pred_class]
100
- pred_prob = probs[0, pred_class].item()
101
- # Return drowsy probability (class 1)
102
- drowsy_prob = probs[0, 1].item()
103
- return drowsy_prob, face_coords, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
  # Create a global instance
106
  detector = DrowsinessDetector()
@@ -111,117 +495,436 @@ def process_image(image):
111
  return None, "No image provided"
112
 
113
  try:
 
 
 
 
 
 
 
114
  # Make prediction
115
- drowsy_prob, face_coords, error = detector.predict(image)
116
 
117
  if error:
118
  return None, error
119
 
120
  if face_coords is None:
121
- return image, "No face detected"
 
 
 
122
 
123
  # Draw bounding box
124
  x, y, w, h = face_coords
125
- color = (0, 255, 0) if drowsy_prob < 0.5 else (0, 0, 255)
126
- cv2.rectangle(image, (x, y), (x+w, y+h), color, 2)
127
 
128
- # Add text
129
- text = f"{'Drowsy' if drowsy_prob >= 0.5 else 'Not Drowsy'} ({drowsy_prob:.2f})"
130
- cv2.putText(image, text, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
131
 
132
- return image, f"Processed successfully. Drowsiness probability: {drowsy_prob:.2f}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
  except Exception as e:
 
 
 
135
  return None, f"Error processing image: {str(e)}"
136
 
137
- def process_video(video):
138
  """Process video input"""
139
  if video is None:
140
  return None, "No video provided"
141
 
142
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  # Get input video properties
144
- cap = cv2.VideoCapture(video)
145
  fps = cap.get(cv2.CAP_PROP_FPS)
 
 
146
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
147
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
- # Create temporary output video file
150
- temp_output = "temp_output.mp4"
151
- fourcc = cv2.VideoWriter_fourcc(*'mp4v')
152
- out = cv2.VideoWriter(temp_output, fourcc, fps, (width, height))
 
 
 
 
 
 
 
 
 
 
 
153
 
154
  while True:
155
  ret, frame = cap.read()
156
  if not ret:
 
157
  break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
- processed_frame = process_image(frame)[0]
160
- if processed_frame is not None:
161
- out.write(processed_frame)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
  # Release resources
164
  cap.release()
165
  out.release()
166
 
167
- # Check if video was created
168
- if os.path.exists(temp_output) and os.path.getsize(temp_output) > 0:
169
- return temp_output, "Video processed successfully"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  else:
171
- return None, "Error: Failed to create output video"
 
172
 
173
  except Exception as e:
 
 
 
174
  return None, f"Error processing video: {str(e)}"
175
  finally:
176
- # Clean up temporary file
177
- if 'out' in locals():
178
  out.release()
179
- if 'cap' in locals():
180
  cap.release()
 
 
 
 
 
 
 
181
 
182
- # Load the model at startup
183
- detector.load_model()
 
184
 
185
- # Create interface
186
- with gr.Blocks(title="Driver Drowsiness Detection") as demo:
187
- gr.Markdown("""
188
- # 🚗 Driver Drowsiness Detection System
 
 
 
189
 
190
- This system detects driver drowsiness using computer vision and deep learning.
 
 
 
191
 
192
- ## Features:
193
- - Image analysis
194
- - Video processing
195
- - Face detection and drowsiness prediction
196
- """)
197
 
198
- with gr.Tabs():
199
- with gr.Tab("Image"):
200
- gr.Markdown("Upload an image for drowsiness detection")
201
- with gr.Row():
202
- image_input = gr.Image(label="Input Image", type="numpy")
203
- image_output = gr.Image(label="Processed Image")
204
- with gr.Row():
205
- status_output = gr.Textbox(label="Status")
206
- image_input.change(
207
- fn=process_image,
208
- inputs=[image_input],
209
- outputs=[image_output, status_output]
210
- )
211
-
212
- with gr.Tab("Video"):
213
- gr.Markdown("Upload a video file for drowsiness detection")
214
- with gr.Row():
215
- video_input = gr.Video(label="Input Video")
216
- video_output = gr.Video(label="Processed Video")
217
- with gr.Row():
218
- video_status = gr.Textbox(label="Status")
219
- video_input.change(
220
- fn=process_video,
221
- inputs=[video_input],
222
- outputs=[video_output, video_status]
223
- )
 
 
 
 
 
 
224
 
225
- # Launch the app
226
- if __name__ == "__main__":
227
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
 
2
  import numpy as np
3
  import cv2
4
  from PIL import Image
5
  import io
6
  import os
7
+ import json
8
+ import time
9
+ import argparse
10
+ import tensorflow as tf
11
+ from tensorflow import keras
12
+ import dlib
13
+ from scipy.spatial import distance as dist
14
+ import math
15
+ from collections import deque
16
+
17
+ class SpeedDetector:
18
+ def __init__(self, history_size=30):
19
+ self.speed_history = deque(maxlen=history_size)
20
+ self.last_update_time = None
21
+ self.current_speed = 0
22
+ self.speed_change_threshold = 5 # km/h
23
+ self.abnormal_speed_changes = 0
24
+ self.speed_deviation_sum = 0
25
+ self.speed_change_score = 0
26
+
27
+ # For optical flow speed estimation
28
+ self.prev_gray = None
29
+ self.prev_points = None
30
+ self.frame_idx = 0
31
+ self.speed_estimate = 60 # Initial estimate
32
+
33
+ def update_speed(self, speed_km_h):
34
+ """Update with current speed in km/h"""
35
+ current_time = time.time()
36
+
37
+ # Add to history
38
+ self.speed_history.append(speed_km_h)
39
+ self.current_speed = speed_km_h
40
+
41
+ # Not enough data yet
42
+ if len(self.speed_history) < 5:
43
+ return 0
44
+
45
+ # Calculate speed variation metrics
46
+ speed_arr = np.array(self.speed_history)
47
+
48
+ # 1. Standard deviation of speed
49
+ speed_std = np.std(speed_arr)
50
+
51
+ # 2. Detect abrupt changes
52
+ for i in range(1, len(speed_arr)):
53
+ change = abs(speed_arr[i] - speed_arr[i-1])
54
+ if change >= self.speed_change_threshold:
55
+ self.abnormal_speed_changes += 1
56
+
57
+ # 3. Calculate average rate of change
58
+ changes = np.abs(np.diff(speed_arr))
59
+ avg_change = np.mean(changes) if len(changes) > 0 else 0
60
+
61
+ # Combine into a score (0-1 range)
62
+ self.speed_deviation_sum = min(5, speed_std) / 5 # Normalize to 0-1
63
+ abnormal_change_factor = min(1, self.abnormal_speed_changes / 5)
64
+ avg_change_factor = min(1, avg_change / self.speed_change_threshold)
65
+
66
+ # Weighted combination
67
+ self.speed_change_score = (
68
+ 0.4 * self.speed_deviation_sum +
69
+ 0.4 * abnormal_change_factor +
70
+ 0.2 * avg_change_factor
71
+ )
72
+
73
+ return self.speed_change_score
74
+
75
+ def detect_speed_from_frame(self, frame):
76
+ """Detect speed from video frame using optical flow"""
77
+ if frame is None:
78
+ return self.current_speed
79
+
80
+ # Convert frame to grayscale
81
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
82
+
83
+ # For the first frame, initialize points to track
84
+ if self.prev_gray is None or self.frame_idx % 30 == 0: # Reset tracking points every 30 frames
85
+ # Detect good features to track
86
+ mask = np.zeros_like(gray)
87
+ # Focus on the lower portion of the frame (road)
88
+ h, w = gray.shape
89
+ mask[h//2:, :] = 255
90
+
91
+ corners = cv2.goodFeaturesToTrack(gray, maxCorners=100, qualityLevel=0.01, minDistance=10, mask=mask)
92
+ if corners is not None and len(corners) > 0:
93
+ self.prev_points = corners
94
+ self.prev_gray = gray.copy()
95
+ else:
96
+ # No good points to track
97
+ self.frame_idx += 1
98
+ return self.current_speed
99
+
100
+ # Calculate optical flow if we have previous points
101
+ if self.prev_gray is not None and self.prev_points is not None:
102
+ # Calculate optical flow
103
+ new_points, status, _ = cv2.calcOpticalFlowPyrLK(self.prev_gray, gray, self.prev_points, None)
104
+
105
+ # Filter only valid points
106
+ if new_points is not None and status is not None:
107
+ good_new = new_points[status == 1]
108
+ good_old = self.prev_points[status == 1]
109
+
110
+ # Calculate flow magnitude
111
+ if len(good_new) > 0 and len(good_old) > 0:
112
+ flow_magnitudes = np.sqrt(
113
+ np.sum((good_new - good_old)**2, axis=1)
114
+ )
115
+ avg_flow = np.mean(flow_magnitudes) if len(flow_magnitudes) > 0 else 0
116
+
117
+ # Map optical flow to speed change
118
+ # Higher flow = faster movement
119
+ # This is a simplified mapping and would need calibration for real-world use
120
+ flow_threshold = 1.0 # Adjust based on testing
121
+
122
+ if avg_flow > flow_threshold:
123
+ # Movement detected, estimate acceleration
124
+ speed_change = min(5, max(-5, (avg_flow - flow_threshold) * 2))
125
+
126
+ # Add some temporal smoothing to avoid sudden changes
127
+ speed_change = speed_change * 0.3 # Reduce magnitude for smoother change
128
+ else:
129
+ # Minimal movement, slight deceleration (coasting)
130
+ speed_change = -0.1
131
+
132
+ # Update speed with detected change
133
+ self.speed_estimate += speed_change
134
+ # Keep speed in reasonable range
135
+ self.speed_estimate = max(40, min(120, self.speed_estimate))
136
+
137
+ # Update tracking points
138
+ self.prev_points = good_new.reshape(-1, 1, 2)
139
+
140
+ # Update previous gray frame
141
+ self.prev_gray = gray.copy()
142
+
143
+ self.frame_idx += 1
144
+
145
+ # Check for dashboard speedometer (would require more sophisticated OCR in a real system)
146
+ # For now, just use our estimated speed
147
+ detected_speed = self.speed_estimate
148
+
149
+ # Update current speed and trigger speed change detection
150
+ self.update_speed(detected_speed)
151
+
152
+ return detected_speed
153
+
154
+ def get_speed_change_score(self):
155
+ """Return a score from 0-1 indicating abnormal speed changes"""
156
+ return self.speed_change_score
157
+
158
+ def reset(self):
159
+ """Reset the detector state"""
160
+ self.speed_history.clear()
161
+ self.abnormal_speed_changes = 0
162
+ self.speed_deviation_sum = 0
163
+ self.speed_change_score = 0
164
+ self.prev_gray = None
165
+ self.prev_points = None
166
+ self.frame_idx = 0
167
+ self.speed_estimate = 60 # Reset to initial estimate
168
 
169
  class DrowsinessDetector:
170
  def __init__(self):
171
  self.model = None
172
+ self.input_shape = (224, 224, 3) # Updated to match model's expected input shape
173
  self.face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
174
  self.id2label = {0: "notdrowsy", 1: "drowsy"}
175
  self.label2id = {"notdrowsy": 0, "drowsy": 1}
176
+
177
+ # Speed detector
178
+ self.speed_detector = SpeedDetector()
179
+ self.SPEED_CHANGE_WEIGHT = 0.15 # Weight for speed changes in drowsiness calculation
180
+
181
+ # Try to load dlib and facial landmark predictor (but make it optional)
182
+ self.landmark_detection_enabled = False
183
+ try:
184
+ import dlib
185
+ self.detector = dlib.get_frontal_face_detector()
186
+ # Check if the shape predictor file exists
187
+ predictor_path = "shape_predictor_68_face_landmarks.dat"
188
+ if not os.path.exists(predictor_path):
189
+ print(f"Warning: {predictor_path} not found. Downloading...")
190
+ import urllib.request
191
+ urllib.request.urlretrieve(
192
+ "https://github.com/italojs/facial-landmarks-recognition/raw/master/shape_predictor_68_face_landmarks.dat",
193
+ predictor_path
194
+ )
195
+ self.predictor = dlib.shape_predictor(predictor_path)
196
+ self.landmark_detection_enabled = True
197
+ print("Facial landmark detection enabled")
198
+ except Exception as e:
199
+ print(f"Warning: Facial landmark detection disabled: {e}")
200
+ print("The system will use a simpler detection method. For better accuracy, install CMake and dlib.")
201
+
202
+ # Constants for drowsiness detection
203
+ self.EAR_THRESHOLD = 0.25 # Eye aspect ratio threshold
204
+ self.CONSECUTIVE_FRAMES = 20
205
+ self.ear_counter = 0
206
+ self.GAZE_THRESHOLD = 0.2 # Gaze direction threshold
207
+ self.HEAD_POSE_THRESHOLD = 0.3 # Head pose threshold
208
+
209
+ # Parameters for weighted ensemble
210
+ self.MODEL_WEIGHT = 0.45 # Reduced to accommodate speed factor
211
+ self.EAR_WEIGHT = 0.2
212
+ self.GAZE_WEIGHT = 0.1
213
+ self.HEAD_POSE_WEIGHT = 0.1
214
+
215
+ # For tracking across frames
216
+ self.prev_drowsy_count = 0
217
+ self.drowsy_history = []
218
+ self.current_speed = 0 # Current speed in km/h
219
+
220
+ def update_speed(self, speed_km_h):
221
+ """Update the current speed"""
222
+ self.current_speed = speed_km_h
223
+ return self.speed_detector.update_speed(speed_km_h)
224
+
225
+ def reset_speed_detector(self):
226
+ """Reset the speed detector"""
227
+ self.speed_detector.reset()
228
 
229
  def load_model(self):
230
+ """Load the CNN model from local files"""
231
  try:
232
+ # Use local model files
233
+ config_path = "huggingface_model/config.json"
234
+ model_path = "drowsiness_model.h5"
235
+
236
+ # Load config
237
+ with open(config_path, 'r') as f:
238
+ config = json.load(f)
239
+
240
+ # Load the Keras model directly
241
+ self.model = keras.models.load_model(model_path)
242
+
243
+ # Print model summary for debugging
244
+ print("Model loaded successfully")
245
+ print(f"Model input shape: {self.model.input_shape}")
246
+ self.model.summary()
 
 
 
 
 
 
 
 
 
 
 
 
 
247
 
 
 
 
 
248
  except Exception as e:
249
  print(f"Error loading CNN model: {str(e)}")
250
  raise
251
+
252
+ def eye_aspect_ratio(self, eye):
253
+ """Calculate the eye aspect ratio"""
254
+ # Compute the euclidean distances between the two sets of vertical eye landmarks
255
+ A = dist.euclidean(eye[1], eye[5])
256
+ B = dist.euclidean(eye[2], eye[4])
257
+
258
+ # Compute the euclidean distance between the horizontal eye landmarks
259
+ C = dist.euclidean(eye[0], eye[3])
260
+
261
+ # Calculate the eye aspect ratio
262
+ ear = (A + B) / (2.0 * C)
263
+ return ear
264
+
265
+ def calculate_gaze(self, eye_points, facial_landmarks):
266
+ """Calculate gaze direction"""
267
+ left_eye_region = np.array([(facial_landmarks.part(i).x, facial_landmarks.part(i).y) for i in range(36, 42)])
268
+ right_eye_region = np.array([(facial_landmarks.part(i).x, facial_landmarks.part(i).y) for i in range(42, 48)])
269
+
270
+ # Compute eye centers
271
+ left_eye_center = left_eye_region.mean(axis=0).astype("int")
272
+ right_eye_center = right_eye_region.mean(axis=0).astype("int")
273
+
274
+ # Compute the angle between eye centers
275
+ dY = right_eye_center[1] - left_eye_center[1]
276
+ dX = right_eye_center[0] - left_eye_center[0]
277
+ angle = np.degrees(np.arctan2(dY, dX))
278
+
279
+ # Normalize the angle
280
+ return abs(angle) / 180.0
281
+
282
+ def get_head_pose(self, shape):
283
+ """Calculate the head pose"""
284
+ # Get specific facial landmarks for head pose estimation
285
+ image_points = np.array([
286
+ (shape.part(30).x, shape.part(30).y), # Nose tip
287
+ (shape.part(8).x, shape.part(8).y), # Chin
288
+ (shape.part(36).x, shape.part(36).y), # Left eye left corner
289
+ (shape.part(45).x, shape.part(45).y), # Right eye right corner
290
+ (shape.part(48).x, shape.part(48).y), # Left mouth corner
291
+ (shape.part(54).x, shape.part(54).y) # Right mouth corner
292
+ ], dtype="double")
293
+
294
+ # A simple head pose estimation using the angle of the face
295
+ # Calculate center of the face
296
+ center_x = np.mean([p[0] for p in image_points])
297
+ center_y = np.mean([p[1] for p in image_points])
298
+
299
+ # Calculate angle with respect to vertical
300
+ angle = 0
301
+ if len(image_points) > 2:
302
+ point1 = image_points[0] # Nose
303
+ point2 = image_points[1] # Chin
304
+ angle = abs(math.atan2(point2[1] - point1[1], point2[0] - point1[0]))
305
+
306
+ # Normalize to 0-1 range where 0 is upright and 1 is drooping
307
+ normalized_pose = min(1.0, abs(angle) / (math.pi/2))
308
+ return normalized_pose
309
 
310
  def detect_face(self, frame):
311
  """Detect face in the frame"""
 
323
  return None
324
  # Convert to RGB
325
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
326
+ # Resize to model input size (224x224)
327
  image = cv2.resize(image, (self.input_shape[0], self.input_shape[1]))
328
  # Normalize
329
  image = image.astype(np.float32) / 255.0
330
+ # Add batch dimension
331
+ image = np.expand_dims(image, axis=0)
332
  return image
333
 
334
  def predict(self, image):
335
+ """Make prediction on the input image using multiple features"""
336
  if self.model is None:
337
  raise ValueError("Model not loaded. Call load_model() first.")
338
+
339
+ # Initialize results
340
+ drowsy_prob = 0.0
341
+ face_coords = None
342
+ ear_value = 1.0 # Default to wide open eyes
343
+ gaze_value = 0.0
344
+ head_pose_value = 0.0
345
+ landmark_detection_success = False
346
+
347
  # Detect face
348
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
349
  face, face_coords = self.detect_face(image)
350
+
351
  if face is None:
352
+ return 0.0, None, "No face detected", {}
353
+
354
+ # Get model prediction
355
  inputs = self.preprocess_image(face)
356
  if inputs is None:
357
+ return 0.0, face_coords, "Error processing image", {}
358
+
359
+ outputs = self.model.predict(inputs)
360
+ # Get the drowsiness probability from the model
361
+ if outputs.shape[1] == 1:
362
+ model_prob = outputs[0][0]
363
+ # Convert to probability if needed
364
+ if model_prob < 0 or model_prob > 1:
365
+ model_prob = 1 / (1 + np.exp(-model_prob))
366
+ else:
367
+ # For multi-class model
368
+ probs = tf.nn.softmax(outputs, axis=1).numpy()
369
+ model_prob = probs[0, 1] # Probability of class 1 (drowsy)
370
+
371
+ # Get speed change score from detector
372
+ speed_change_score = self.speed_detector.get_speed_change_score()
373
+
374
+ # Get additional features if landmark detection is enabled
375
+ metrics = {
376
+ "model_prob": model_prob,
377
+ "ear": 1.0,
378
+ "gaze": 0.0,
379
+ "head_pose": 0.0,
380
+ "speed_change": speed_change_score
381
+ }
382
+
383
+ if self.landmark_detection_enabled:
384
+ try:
385
+ # Import dlib here to avoid errors if it's not installed
386
+ import dlib
387
+ from scipy.spatial import distance as dist
388
+
389
+ # Detect faces with dlib for landmark detection
390
+ rects = self.detector(gray, 0)
391
+
392
+ if len(rects) > 0:
393
+ # Get facial landmarks
394
+ shape = self.predictor(gray, rects[0])
395
+
396
+ # Get eye aspect ratio
397
+ left_eye = [(shape.part(i).x, shape.part(i).y) for i in range(36, 42)]
398
+ right_eye = [(shape.part(i).x, shape.part(i).y) for i in range(42, 48)]
399
+
400
+ left_ear = self.eye_aspect_ratio(left_eye)
401
+ right_ear = self.eye_aspect_ratio(right_eye)
402
+ ear_value = (left_ear + right_ear) / 2.0
403
+
404
+ # Get gaze direction
405
+ gaze_value = self.calculate_gaze(None, shape)
406
+
407
+ # Get head pose
408
+ head_pose_value = self.get_head_pose(shape)
409
+
410
+ # Update metrics
411
+ metrics["ear"] = ear_value
412
+ metrics["gaze"] = gaze_value
413
+ metrics["head_pose"] = head_pose_value
414
+
415
+ landmark_detection_success = True
416
+ except Exception as e:
417
+ print(f"Error in landmark detection: {e}")
418
+ else:
419
+ # Use a simplified heuristic approach when dlib is not available
420
+ # Calculate an estimated eye ratio from the grayscale intensity in eye regions
421
+ # This is a simplified approach that is not as accurate as the EAR method
422
+ if face_coords is not None:
423
+ try:
424
+ # Try to estimate eye regions based on face proportions
425
+ face_gray = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
426
+ face_height, face_width = face_gray.shape[:2]
427
+
428
+ # Estimate eye regions (these are approximate and may not be accurate for all faces)
429
+ left_eye_region = face_gray[int(face_height*0.2):int(face_height*0.4), int(face_width*0.2):int(face_width*0.4)]
430
+ right_eye_region = face_gray[int(face_height*0.2):int(face_height*0.4), int(face_width*0.6):int(face_width*0.8)]
431
+
432
+ # Simplified metric: use average intensity - lower values might indicate closed eyes
433
+ if left_eye_region.size > 0 and right_eye_region.size > 0:
434
+ left_eye_avg = np.mean(left_eye_region) / 255.0
435
+ right_eye_avg = np.mean(right_eye_region) / 255.0
436
+
437
+ # Invert so that darker regions (potentially closed eyes) have higher values
438
+ left_eye_closed = 1.0 - left_eye_avg
439
+ right_eye_closed = 1.0 - right_eye_avg
440
+
441
+ # Combine into a simple eye closure metric (0-1 range, higher means more closed)
442
+ eye_closure = (left_eye_closed + right_eye_closed) / 2.0
443
+
444
+ # Convert to a rough approximation of EAR
445
+ # Lower values indicate more closed eyes (like EAR)
446
+ estimated_ear = max(0.15, 0.4 - (eye_closure * 0.25))
447
+ ear_value = estimated_ear
448
+ metrics["ear"] = ear_value
449
+ except Exception as e:
450
+ print(f"Error in simplified eye detection: {e}")
451
+
452
+ # Combine features for final drowsiness probability
453
+ if landmark_detection_success:
454
+ # Calculate eye state factor (1.0 when eyes closed, 0.0 when fully open)
455
+ eye_state = max(0, min(1, (self.EAR_THRESHOLD - ear_value) * 5))
456
+
457
+ # Weight the factors
458
+ weighted_avg = (
459
+ self.MODEL_WEIGHT * model_prob +
460
+ self.EAR_WEIGHT * eye_state +
461
+ self.GAZE_WEIGHT * gaze_value +
462
+ self.HEAD_POSE_WEIGHT * head_pose_value +
463
+ self.SPEED_CHANGE_WEIGHT * speed_change_score # Add speed change factor
464
+ )
465
+
466
+ # Update drowsy probability
467
+ drowsy_prob = weighted_avg
468
+ else:
469
+ # If landmark detection failed, use simplified approach
470
+ # Use model probability with higher weight
471
+ if "ear" in metrics and metrics["ear"] < 1.0:
472
+ # We have the simplified eye metric
473
+ eye_state = max(0, min(1, (self.EAR_THRESHOLD - metrics["ear"]) * 5))
474
+ drowsy_prob = (self.MODEL_WEIGHT * model_prob) + ((1 - self.MODEL_WEIGHT - self.SPEED_CHANGE_WEIGHT) * eye_state) + (self.SPEED_CHANGE_WEIGHT * speed_change_score)
475
+ else:
476
+ # Only model and speed are available
477
+ drowsy_prob = (model_prob * 0.85) + (speed_change_score * 0.15)
478
+
479
+ # Apply smoothing with history
480
+ self.drowsy_history.append(drowsy_prob)
481
+ if len(self.drowsy_history) > 10:
482
+ self.drowsy_history.pop(0)
483
+
484
+ # Use median filtering for robustness
485
+ drowsy_prob = np.median(self.drowsy_history)
486
+
487
+ return drowsy_prob, face_coords, None, metrics
488
 
489
  # Create a global instance
490
  detector = DrowsinessDetector()
 
495
  return None, "No image provided"
496
 
497
  try:
498
+ # Check for valid image
499
+ if image.size == 0 or image.shape[0] == 0 or image.shape[1] == 0:
500
+ return None, "Invalid image dimensions"
501
+
502
+ # Make a copy of the image to avoid modifying the original
503
+ processed_image = image.copy()
504
+
505
  # Make prediction
506
+ drowsy_prob, face_coords, error, metrics = detector.predict(processed_image)
507
 
508
  if error:
509
  return None, error
510
 
511
  if face_coords is None:
512
+ # No face detected - add text to the image and return it
513
+ cv2.putText(processed_image, "No face detected", (30, 30),
514
+ cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 0, 255), 2)
515
+ return processed_image, "No face detected"
516
 
517
  # Draw bounding box
518
  x, y, w, h = face_coords
 
 
519
 
520
+ # Use a higher threshold (0.7) to reduce false positives
521
+ is_drowsy = drowsy_prob >= 0.7
 
522
 
523
+ # Determine alert level and color
524
+ if drowsy_prob >= 0.85:
525
+ alert_level = "High Risk"
526
+ color = (0, 0, 255) # Red
527
+ elif drowsy_prob >= 0.7:
528
+ alert_level = "Medium Risk"
529
+ color = (0, 165, 255) # Orange
530
+ else:
531
+ alert_level = "Alert"
532
+ color = (0, 255, 0) # Green
533
+
534
+ cv2.rectangle(processed_image, (x, y), (x+w, y+h), color, 2)
535
+
536
+ # Add the metrics as text on image
537
+ y_offset = 25
538
+ cv2.putText(processed_image, f"{'Drowsy' if is_drowsy else 'Alert'} ({drowsy_prob:.2f})",
539
+ (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
540
+
541
+ # Add alert level
542
+ cv2.putText(processed_image, alert_level, (x, y-35),
543
+ cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)
544
+
545
+ # Add metrics in bottom left
546
+ cv2.putText(processed_image, f"Model: {metrics['model_prob']:.2f}", (10, processed_image.shape[0]-10-y_offset*3),
547
+ cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
548
+ cv2.putText(processed_image, f"Eye Ratio: {metrics['ear']:.2f}", (10, processed_image.shape[0]-10-y_offset*2),
549
+ cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
550
+ cv2.putText(processed_image, f"Head Pose: {metrics['head_pose']:.2f}", (10, processed_image.shape[0]-10-y_offset),
551
+ cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
552
+
553
+ # Add confidence disclaimer for high model probabilities but good eye metrics
554
+ if metrics['model_prob'] > 0.9 and metrics['ear'] > 0.25:
555
+ cv2.putText(processed_image, "Model conflict - verify manually",
556
+ (10, processed_image.shape[0]-10-y_offset*4),
557
+ cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 165, 255), 1)
558
+
559
+ return processed_image, f"Processed successfully. Drowsiness: {drowsy_prob:.2f}, Alert level: {alert_level}"
560
 
561
  except Exception as e:
562
+ import traceback
563
+ error_details = traceback.format_exc()
564
+ print(f"Error processing image: {str(e)}\n{error_details}")
565
  return None, f"Error processing image: {str(e)}"
566
 
567
+ def process_video(video, initial_speed=60):
568
  """Process video input"""
569
  if video is None:
570
  return None, "No video provided"
571
 
572
  try:
573
+ # 创建内存缓冲区而不是临时文件
574
+ temp_input = None
575
+
576
+ # Handle video input (can be file path or video data)
577
+ if isinstance(video, str):
578
+ print(f"Processing video from path: {video}")
579
+ # 直接读取原始文件,不复制到临时目录
580
+ cap = cv2.VideoCapture(video)
581
+ else:
582
+ print(f"Processing video from uploaded data")
583
+ # 读取上传的视频数据到内存
584
+ import tempfile
585
+ temp_input = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
586
+ temp_input_path = temp_input.name
587
+ with open(temp_input_path, "wb") as f:
588
+ f.write(video)
589
+ cap = cv2.VideoCapture(temp_input_path)
590
+
591
+ if not cap.isOpened():
592
+ return None, "Error: Could not open video"
593
+
594
  # Get input video properties
 
595
  fps = cap.get(cv2.CAP_PROP_FPS)
596
+ if fps <= 0:
597
+ fps = 30 # Default to 30fps if invalid
598
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
599
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
600
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
601
+
602
+ print(f"Video properties: {width}x{height} at {fps}fps, total frames: {total_frames}")
603
+
604
+ # 创建内存缓冲区而不是临时输出文件
605
+ import io
606
+ import base64
607
+
608
+ # 使用临时文件来存储处理后的视频(处理完毕后会删除)
609
+ import tempfile
610
+ temp_output = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
611
+ temp_output_path = temp_output.name
612
+
613
+ # Try different codecs on Windows
614
+ if os.name == 'nt': # Windows
615
+ # 使用mp4v编码以确保兼容性
616
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
617
+ else:
618
+ # On other platforms, use MP4V
619
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
620
+
621
+ # Create video writer
622
+ out = cv2.VideoWriter(temp_output_path, fourcc, fps, (width, height))
623
+ if not out.isOpened():
624
+ return None, "Error: Could not create output video file"
625
 
626
+ # Reset speed detector at the start of each video
627
+ detector.reset_speed_detector()
628
+
629
+ # Initialize speed value with the provided initial speed
630
+ current_speed = initial_speed
631
+ detector.speed_detector.speed_estimate = initial_speed
632
+
633
+ # Process each frame
634
+ frame_count = 0
635
+ processed_count = 0
636
+ face_detected_count = 0
637
+ drowsy_count = 0
638
+ high_risk_count = 0
639
+ ear_sum = 0
640
+ model_prob_sum = 0
641
 
642
  while True:
643
  ret, frame = cap.read()
644
  if not ret:
645
+ print(f"End of video or error reading frame at frame {frame_count}")
646
  break
647
+
648
+ frame_count += 1
649
+
650
+ # Detect speed from the current frame
651
+ current_speed = detector.speed_detector.detect_speed_from_frame(frame)
652
+
653
+ try:
654
+ # Try to process the frame
655
+ processed_frame, message = process_image(frame)
656
+
657
+ # Add speed info to the frame
658
+ if processed_frame is not None:
659
+ speed_text = f"Speed: {current_speed:.1f} km/h"
660
+ cv2.putText(processed_frame, speed_text, (10, processed_frame.shape[0]-45),
661
+ cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
662
+
663
+ # Add speed change score
664
+ speed_change_score = detector.speed_detector.get_speed_change_score()
665
+ cv2.putText(processed_frame, f"Speed Variation: {speed_change_score:.2f}",
666
+ (10, processed_frame.shape[0]-70),
667
+ cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
668
 
669
+ if processed_frame is not None:
670
+ out.write(processed_frame)
671
+ processed_count += 1
672
+ if "No face detected" not in message:
673
+ face_detected_count += 1
674
+ if "Drowsiness" in message:
675
+ # Extract drowsiness probability
676
+ try:
677
+ drowsy_text = message.split("Drowsiness: ")[1].split(",")[0]
678
+ drowsy_prob = float(drowsy_text)
679
+
680
+ # Track drowsiness stats
681
+ if drowsy_prob >= 0.7:
682
+ drowsy_count += 1
683
+ if drowsy_prob >= 0.85:
684
+ high_risk_count += 1
685
+
686
+ # Get metrics from the frame
687
+ _, _, _, metrics = detector.predict(frame)
688
+ if 'ear' in metrics:
689
+ ear_sum += metrics['ear']
690
+ if 'model_prob' in metrics:
691
+ model_prob_sum += metrics['model_prob']
692
+ except:
693
+ pass
694
+ else:
695
+ # Fallback: If processing fails, just use the original frame
696
+ # Add text indicating processing failed
697
+ cv2.putText(frame, "Processing failed", (30, 30),
698
+ cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
699
+ out.write(frame)
700
+ processed_count += 1
701
+ print(f"Frame {frame_count}: Processing failed - {message}")
702
+ except Exception as e:
703
+ # If any error occurs during processing, use original frame
704
+ cv2.putText(frame, f"Error: {str(e)[:30]}", (30, 30),
705
+ cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
706
+ out.write(frame)
707
+ processed_count += 1
708
+ print(f"Frame {frame_count}: Exception - {str(e)}")
709
+
710
+ # Print progress for every 10th frame
711
+ if frame_count % 10 == 0:
712
+ print(f"Processed {frame_count}/{total_frames} frames")
713
 
714
  # Release resources
715
  cap.release()
716
  out.release()
717
 
718
+ # Calculate statistics
719
+ drowsy_percentage = (drowsy_count / face_detected_count * 100) if face_detected_count > 0 else 0
720
+ high_risk_percentage = (high_risk_count / face_detected_count * 100) if face_detected_count > 0 else 0
721
+ avg_ear = ear_sum / face_detected_count if face_detected_count > 0 else 0
722
+ avg_model_prob = model_prob_sum / face_detected_count if face_detected_count > 0 else 0
723
+ speed_score = detector.speed_detector.get_speed_change_score()
724
+
725
+ # Check if video was created successfully and return it directly
726
+ if os.path.exists(temp_output_path) and os.path.getsize(temp_output_path) > 0:
727
+ print(f"Video processed successfully with {processed_count} frames")
728
+ print(f"Drowsy frames: {drowsy_count} ({drowsy_percentage:.1f}%), High risk frames: {high_risk_count} ({high_risk_percentage:.1f}%)")
729
+ print(f"Average eye ratio: {avg_ear:.2f}, Average model probability: {avg_model_prob:.2f}")
730
+ print(f"Speed change score: {speed_score:.2f}")
731
+
732
+ # If model prob is high but eye ratio is also high (open eyes), flag potential false positive
733
+ false_positive_warning = ""
734
+ if avg_model_prob > 0.8 and avg_ear > 0.25:
735
+ false_positive_warning = " ⚠️ Possible false positive (eyes open but model detects drowsiness)"
736
+
737
+ result_message = (f"Video processed successfully. Frames: {frame_count}, faces detected: {face_detected_count}, "
738
+ f"drowsy: {drowsy_count} ({drowsy_percentage:.1f}%), high risk: {high_risk_count} ({high_risk_percentage:.1f}%)."
739
+ f" Avg eye ratio: {avg_ear:.2f}, Speed score: {speed_score:.2f}{false_positive_warning}")
740
+
741
+ # 直接返回文件而不保留它
742
+ video_result = temp_output_path
743
+
744
+ return video_result, result_message
745
  else:
746
+ print(f"Failed to create output video. Frames read: {frame_count}, processed: {processed_count}")
747
+ return None, f"Error: Failed to create output video. Frames read: {frame_count}, processed: {processed_count}"
748
 
749
  except Exception as e:
750
+ import traceback
751
+ error_details = traceback.format_exc()
752
+ print(f"Error processing video: {str(e)}\n{error_details}")
753
  return None, f"Error processing video: {str(e)}"
754
  finally:
755
+ # Clean up resources
756
+ if 'out' in locals() and out is not None:
757
  out.release()
758
+ if 'cap' in locals() and cap is not None:
759
  cap.release()
760
+
761
+ # 删除临时输入文件(如果存在)
762
+ if temp_input is not None:
763
+ try:
764
+ os.unlink(temp_input.name)
765
+ except:
766
+ pass
767
 
768
+ def process_webcam(image):
769
+ """Process webcam input - returns processed image and status message"""
770
+ return process_image(image)
771
 
772
+ # Launch the app
773
+ if __name__ == "__main__":
774
+ # Parse command line arguments
775
+ parser = argparse.ArgumentParser(description="Driver Drowsiness Detection App")
776
+ parser.add_argument("--share", action="store_true", help="Create a public link (may trigger security warnings)")
777
+ parser.add_argument("--port", type=int, default=7860, help="Port to run the app on")
778
+ args = parser.parse_args()
779
 
780
+ # Print warning if share is enabled
781
+ if args.share:
782
+ print("WARNING: Running with --share may trigger security warnings on some systems.")
783
+ print("The app will be accessible from the internet through a temporary URL.")
784
 
785
+ # 注册退出时的清理函数
786
+ import atexit
787
+ import glob
788
+ import shutil
 
789
 
790
+ def cleanup_temp_files():
791
+ """清理所有临时文件"""
792
+ try:
793
+ # 删除所有可能留下的临时文件
794
+ import tempfile
795
+ temp_dir = tempfile.gettempdir()
796
+ pattern = os.path.join(temp_dir, "tmp*")
797
+ for file in glob.glob(pattern):
798
+ try:
799
+ if os.path.isfile(file):
800
+ os.remove(file)
801
+ except Exception as e:
802
+ print(f"Failed to delete {file}: {e}")
803
+
804
+ # 确保没有留下.mp4或.avi文件
805
+ for ext in [".mp4", ".avi"]:
806
+ pattern = os.path.join(temp_dir, f"*{ext}")
807
+ for file in glob.glob(pattern):
808
+ try:
809
+ os.remove(file)
810
+ except Exception as e:
811
+ print(f"Failed to delete {file}: {e}")
812
+
813
+ print("Cleaned up temporary files")
814
+ except Exception as e:
815
+ print(f"Error during cleanup: {e}")
816
+
817
+ # 注册清理函数
818
+ atexit.register(cleanup_temp_files)
819
+
820
+ # Load the model at startup
821
+ detector.load_model()
822
 
823
+ # Create interface
824
+ with gr.Blocks(title="Driver Drowsiness Detection") as demo:
825
+ gr.Markdown("""
826
+ # 🚗 Driver Drowsiness Detection System
827
+
828
+ This system detects driver drowsiness using computer vision and deep learning.
829
+
830
+ ## Features:
831
+ - Image analysis
832
+ - Video processing with speed monitoring
833
+ - Webcam detection (PC and mobile)
834
+ - Multi-factor drowsiness prediction (face, eyes, head pose, speed changes)
835
+ """)
836
+
837
+ with gr.Tabs():
838
+ with gr.Tab("Image"):
839
+ gr.Markdown("Upload an image for drowsiness detection")
840
+ with gr.Row():
841
+ image_input = gr.Image(label="Input Image", type="numpy")
842
+ image_output = gr.Image(label="Processed Image")
843
+ with gr.Row():
844
+ status_output = gr.Textbox(label="Status")
845
+ image_input.change(
846
+ fn=process_image,
847
+ inputs=[image_input],
848
+ outputs=[image_output, status_output]
849
+ )
850
+
851
+ with gr.Tab("Video"):
852
+ gr.Markdown("""
853
+ ### 上傳駕駛視頻進行困倦檢測
854
+
855
+ 系統將自動從視頻中檢測以下內容:
856
+ - 駕駛員面部表情和眼睛狀態
857
+ - 車輛速度變化 (通過視頻中的光流分析)
858
+ - 當車速變化超過 ±5 km/h 時將被視為異常駕駛行為
859
+
860
+ **注意:** 處理後的視頻不會保存到本地文件夾,請使用界面右上角的下載按鈕保存結果。
861
+ """)
862
+ with gr.Row():
863
+ video_input = gr.Video(label="輸入視頻")
864
+ video_output = gr.Video(label="處理後視頻 (點擊右上角下載)")
865
+ with gr.Row():
866
+ initial_speed = gr.Slider(minimum=10, maximum=120, value=60, label="初始車速估計值 (km/h)",
867
+ info="僅作為初始估計值,系統會自動從視頻中檢測實際速度變化")
868
+ with gr.Row():
869
+ video_status = gr.Textbox(label="處理狀態")
870
+ with gr.Row():
871
+ process_btn = gr.Button("處理視頻")
872
+ clear_btn = gr.Button("清除")
873
+
874
+ process_btn.click(
875
+ fn=process_video,
876
+ inputs=[video_input, initial_speed],
877
+ outputs=[video_output, video_status]
878
+ )
879
+
880
+ clear_btn.click(
881
+ fn=lambda: (None, "已清除結果"),
882
+ inputs=[],
883
+ outputs=[video_output, video_status]
884
+ )
885
+
886
+ with gr.Tab("Webcam"):
887
+ gr.Markdown("Use your webcam or mobile camera for real-time drowsiness detection")
888
+ with gr.Row():
889
+ webcam_input = gr.Image(source="webcam", streaming=True, label="Camera Feed", type="numpy")
890
+ webcam_output = gr.Image(label="Processed Feed")
891
+ with gr.Row():
892
+ speed_input = gr.Slider(minimum=0, maximum=150, value=60, label="Current Speed (km/h)")
893
+ update_speed_btn = gr.Button("Update Speed")
894
+ with gr.Row():
895
+ webcam_status = gr.Textbox(label="Status")
896
+
897
+ def process_webcam_with_speed(image, speed):
898
+ detector.update_speed(speed)
899
+ return process_image(image)
900
+
901
+ update_speed_btn.click(
902
+ fn=lambda speed: f"Speed updated to {speed} km/h",
903
+ inputs=[speed_input],
904
+ outputs=[webcam_status]
905
+ )
906
+
907
+ webcam_input.change(
908
+ fn=process_webcam_with_speed,
909
+ inputs=[webcam_input, speed_input],
910
+ outputs=[webcam_output, webcam_status]
911
+ )
912
+
913
+ gr.Markdown("""
914
+ ## How It Works
915
+ This system detects drowsiness using multiple factors:
916
+ 1. **Facial features** - Using a trained CNN model
917
+ 2. **Eye openness** - Measuring eye aspect ratio (EAR)
918
+ 3. **Head position** - Detecting head drooping
919
+ 4. **Automatic speed detection** - Using optical flow analysis to track vehicle movement and detect irregular speed changes
920
+
921
+ The system automatically detects speed changes from the video frames using computer vision techniques:
922
+ - **Optical flow** is used to track movement between frames
923
+ - **Irregular speed changes** (±5 km/h) are detected as potential signs of drowsy driving
924
+ - **No external speed data required** - everything is analyzed directly from the video content
925
+
926
+ Combining these factors provides more reliable drowsiness detection than using facial features alone.
927
+ """)
928
+
929
+ # Launch the app
930
+ demo.launch(share=args.share, server_port=args.port)