dnn_space

Running

App Files Files Community

ckcl commited on May 15, 2025

Commit

0ce4306

verified ·

1 Parent(s): dd8e6c7

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -77

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
 import torch
-from transformers import ViTForImageClassification, ViTImageProcessor
 import numpy as np
 import cv2
 from PIL import Image
@@ -10,28 +10,49 @@ import os
 class DrowsinessDetector:
     def __init__(self):
         self.model = None
-        self.processor = None
-        self.input_shape = (224, 224, 3)
         self.face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
-        self.id2label = {0: "Notdrowsy", 1: "drowsy"}
-        self.label2id = {"Notdrowsy": 0, "drowsy": 1}
     def load_model(self):
-        """Load the ViT model and processor from Hugging Face Hub"""
         try:
-            model_id = "ckcl/driver-drowsiness-detector"  # 使用你的模型ID
-            self.model = ViTForImageClassification.from_pretrained(
-                model_id,
-                num_labels=2,
-                id2label=self.id2label,
-                label2id=self.label2id,
-                ignore_mismatched_sizes=True
             )
             self.model.eval()
-            self.processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224")
-            print(f"ViT model loaded successfully from {model_id}")
         except Exception as e:
-            print(f"Error loading ViT model: {str(e)}")
             raise
     def detect_face(self, frame):
@@ -45,30 +66,35 @@ class DrowsinessDetector:
         return None, None
     def preprocess_image(self, image):
-        """Preprocess the input image for ViT"""
         if image is None:
             return None
-        pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
-        inputs = self.processor(images=pil_img, return_tensors="pt")
-        return inputs
     def predict(self, image):
-        """Make prediction on the input image using ViT"""
-        if self.model is None or self.processor is None:
-            raise ValueError("未加载模型. 首先呼用 load_model().")
         # Detect face
         face, face_coords = self.detect_face(image)
         if face is None:
-            return None, None, "没有检测到的脸"
         # Preprocess the face image
         inputs = self.preprocess_image(face)
         if inputs is None:
-            return None, None, "错误处理图像"
         # Make prediction
         with torch.no_grad():
-            outputs = self.model(**inputs)
-            logits = outputs.logits
-            probs = torch.softmax(logits, dim=1)
             pred_class = torch.argmax(probs, dim=1).item()
             pred_label = self.id2label[pred_class]
             pred_prob = probs[0, pred_class].item()
@@ -76,53 +102,42 @@ class DrowsinessDetector:
         drowsy_prob = probs[0, 1].item()
         return drowsy_prob, face_coords, None
-# Initialize detector
 detector = DrowsinessDetector()
 def process_image(image):
-    """Process a single image"""
     if image is None:
-        return None, "没有提供图像"
     try:
-        # Convert image to numpy array if it's a PIL Image
-        if isinstance(image, Image.Image):
-            image = np.array(image)
-        # Convert frame to RGB if needed
-        if len(image.shape) == 2:
-            image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
-        elif image.shape[2] == 4:
-            image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
         # Make prediction
         drowsy_prob, face_coords, error = detector.predict(image)
         if error:
-            return image, error
-        if face_coords is not None:
-            x, y, w, h = face_coords
-            # Draw rectangle around face
-            color = (0, 0, 255) if drowsy_prob > 0.7 else (0, 255, 0)
-            cv2.rectangle(image, (x, y), (x+w, y+h), color, 2)
-            # Add text
-            status = "昏昏欲睡" if drowsy_prob > 0.7 else "警报"
-            cv2.putText(image, f"{status} ({drowsy_prob:.2%})",
-                       (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
-            return image, f"Status: {status} (Confidence: {drowsy_prob:.2%})"
-        else:
-            return image, "没有检测到的脸"
     except Exception as e:
-        return image, f"Error processing image: {str(e)}"
 def process_video(video):
     """Process video input"""
     if video is None:
-        return None, "没有提供视频"
     try:
         # Get input video properties
@@ -151,9 +166,9 @@ def process_video(video):
         # Check if video was created
         if os.path.exists(temp_output) and os.path.getsize(temp_output) > 0:
-            return temp_output, "视频成功处理"
         else:
-            return None, "错误：无法创建输出视频"
     except Exception as e:
         return None, f"Error processing video: {str(e)}"
@@ -168,39 +183,39 @@ def process_video(video):
 detector.load_model()
 # Create interface
-with gr.Blocks(title="驾驶员嗜睡检测") as demo:
     gr.Markdown("""
-    # 🚗 驾驶员嗜睡检测系统
-    该系统使用计算机视觉和深度学习来检测驾驶员的嗜睡。
-    ＃＃ 特征：
-     - 图像分析
-     - 视频处理
-     - 面部检测和嗜睡预测
     """)
     with gr.Tabs():
-        with gr.Tab("图像"):
-            gr.Markdown("上传图像以进行嗜睡检测")
             with gr.Row():
-                image_input = gr.Image(label="输入图像", type="numpy")
-                image_output = gr.Image(label="处理的图像")
             with gr.Row():
-                status_output = gr.Textbox(label="状态")
             image_input.change(
                 fn=process_image,
                 inputs=[image_input],
                 outputs=[image_output, status_output]
             )
-        with gr.Tab("视频"):
-            gr.Markdown("上传视频文件以进行嗜睡检测")
             with gr.Row():
-                video_input = gr.Video(label="输入视频")
-                video_output = gr.Video(label="处理的视频")
             with gr.Row():
-                video_status = gr.Textbox(label="状态")
             video_input.change(
                 fn=process_video,
                 inputs=[video_input],

 import gradio as gr
 import torch
+import torch.nn as nn
 import numpy as np
 import cv2
 from PIL import Image
 class DrowsinessDetector:
     def __init__(self):
         self.model = None
+        self.input_shape = (64, 64, 3)
         self.face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
+        self.id2label = {0: "notdrowsy", 1: "drowsy"}
+        self.label2id = {"notdrowsy": 0, "drowsy": 1}
     def load_model(self):
+        """Load the CNN model from Hugging Face Hub"""
         try:
+            model_id = "ckcl/driver-drowsiness-detector"
+            # Load the model configuration
+            config = torch.load(f"{model_id}/config.json")
+            # Create CNN model
+            self.model = nn.Sequential(
+                nn.Conv2d(3, 32, kernel_size=3, padding=1),
+                nn.BatchNorm2d(32),
+                nn.ReLU(),
+                nn.MaxPool2d(2),
+                nn.Conv2d(32, 64, kernel_size=3, padding=1),
+                nn.BatchNorm2d(64),
+                nn.ReLU(),
+                nn.MaxPool2d(2),
+                nn.Conv2d(64, 128, kernel_size=3, padding=1),
+                nn.BatchNorm2d(128),
+                nn.ReLU(),
+                nn.MaxPool2d(2),
+                nn.Flatten(),
+                nn.Linear(128 * 8 * 8, 128),
+                nn.BatchNorm1d(128),
+                nn.ReLU(),
+                nn.Dropout(0.5),
+                nn.Linear(128, 2)
             )
+            # Load the model weights
+            self.model.load_state_dict(torch.load(f"{model_id}/pytorch_model.bin"))
             self.model.eval()
+            print(f"CNN model loaded successfully from {model_id}")
         except Exception as e:
+            print(f"Error loading CNN model: {str(e)}")
             raise
     def detect_face(self, frame):
         return None, None
     def preprocess_image(self, image):
+        """Preprocess the input image for CNN"""
         if image is None:
             return None
+        # Convert to RGB
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        # Resize to model input size
+        image = cv2.resize(image, (self.input_shape[0], self.input_shape[1]))
+        # Normalize
+        image = image.astype(np.float32) / 255.0
+        # Convert to tensor and add batch dimension
+        image = torch.from_numpy(image).permute(2, 0, 1).unsqueeze(0)
+        return image
     def predict(self, image):
+        """Make prediction on the input image using CNN"""
+        if self.model is None:
+            raise ValueError("Model not loaded. Call load_model() first.")
         # Detect face
         face, face_coords = self.detect_face(image)
         if face is None:
+            return None, None, "No face detected"
         # Preprocess the face image
         inputs = self.preprocess_image(face)
         if inputs is None:
+            return None, None, "Error processing image"
         # Make prediction
         with torch.no_grad():
+            outputs = self.model(inputs)
+            probs = torch.softmax(outputs, dim=1)
             pred_class = torch.argmax(probs, dim=1).item()
             pred_label = self.id2label[pred_class]
             pred_prob = probs[0, pred_class].item()
         drowsy_prob = probs[0, 1].item()
         return drowsy_prob, face_coords, None
+# Create a global instance
 detector = DrowsinessDetector()
 def process_image(image):
+    """Process image input"""
     if image is None:
+        return None, "No image provided"
     try:
         # Make prediction
         drowsy_prob, face_coords, error = detector.predict(image)
         if error:
+            return None, error
+        if face_coords is None:
+            return image, "No face detected"
+        # Draw bounding box
+        x, y, w, h = face_coords
+        color = (0, 255, 0) if drowsy_prob < 0.5 else (0, 0, 255)
+        cv2.rectangle(image, (x, y), (x+w, y+h), color, 2)
+        # Add text
+        text = f"{'Drowsy' if drowsy_prob >= 0.5 else 'Not Drowsy'} ({drowsy_prob:.2f})"
+        cv2.putText(image, text, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
+        return image, f"Processed successfully. Drowsiness probability: {drowsy_prob:.2f}"
     except Exception as e:
+        return None, f"Error processing image: {str(e)}"
 def process_video(video):
     """Process video input"""
     if video is None:
+        return None, "No video provided"
     try:
         # Get input video properties
         # Check if video was created
         if os.path.exists(temp_output) and os.path.getsize(temp_output) > 0:
+            return temp_output, "Video processed successfully"
         else:
+            return None, "Error: Failed to create output video"
     except Exception as e:
         return None, f"Error processing video: {str(e)}"
 detector.load_model()
 # Create interface
+with gr.Blocks(title="Driver Drowsiness Detection") as demo:
     gr.Markdown("""
+    # 🚗 Driver Drowsiness Detection System
+    This system detects driver drowsiness using computer vision and deep learning.
+    ## Features:
+    - Image analysis
+    - Video processing
+    - Face detection and drowsiness prediction
     """)
     with gr.Tabs():
+        with gr.Tab("Image"):
+            gr.Markdown("Upload an image for drowsiness detection")
             with gr.Row():
+                image_input = gr.Image(label="Input Image", type="numpy")
+                image_output = gr.Image(label="Processed Image")
             with gr.Row():
+                status_output = gr.Textbox(label="Status")
             image_input.change(
                 fn=process_image,
                 inputs=[image_input],
                 outputs=[image_output, status_output]
             )
+        with gr.Tab("Video"):
+            gr.Markdown("Upload a video file for drowsiness detection")
             with gr.Row():
+                video_input = gr.Video(label="Input Video")
+                video_output = gr.Video(label="Processed Video")
             with gr.Row():
+                video_status = gr.Textbox(label="Status")
             video_input.change(
                 fn=process_video,
                 inputs=[video_input],