dnn_space

Running

App Files Files Community

ckcl commited on May 14, 2025

Commit

56df4e3

verified ·

1 Parent(s): 7e775cf

Update app.py

Browse files

Files changed (1) hide show

app.py +211 -263

app.py CHANGED Viewed

@@ -1,264 +1,212 @@
-import gradio as gr
-import torch
-from transformers import ViTForImageClassification, ViTImageProcessor
-import numpy as np
-import cv2
-from PIL import Image
-import io
-import os
-import sys
-import time
-class DrowsinessDetector:
-    def __init__(self):
-        self.model = None
-        self.processor = None
-        self.input_shape = (224, 224, 3)
-        self.face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
-        self.id2label = {0: "notdrowsy", 1: "drowsy"}
-        self.label2id = {"notdrowsy": 0, "drowsy": 1}
-    def load_model(self, model_path):
-        """Load the ViT model and processor from the specified path or directory"""
-        try:
-            self.model = ViTForImageClassification.from_pretrained(
-                model_path,  # 直接給資料夾路徑
-                num_labels=2,
-                id2label=self.id2label,
-                label2id=self.label2id,
-                ignore_mismatched_sizes=True
-            )
-            self.model.eval()
-            self.processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224")
-            print(f"ViT model loaded successfully from {model_path}")
-        except Exception as e:
-            print(f"Error loading ViT model: {str(e)}")
-            raise
-    def detect_face(self, frame):
-        """Detect face in the frame"""
-        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-        faces = self.face_cascade.detectMultiScale(gray, 1.1, 4)
-        if len(faces) > 0:
-            (x, y, w, h) = faces[0]  # Get the first face
-            face = frame[y:y+h, x:x+w]
-            return face, (x, y, w, h)
-        return None, None
-    def preprocess_image(self, image):
-        """Preprocess the input image for ViT"""
-        if image is None:
-            return None
-        pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
-        inputs = self.processor(images=pil_img, return_tensors="pt")
-        return inputs
-    def predict(self, image):
-        """Make prediction on the input image using ViT"""
-        if self.model is None or self.processor is None:
-            raise ValueError("Model not loaded. Call load_model() first.")
-        # Detect face
-        face, face_coords = self.detect_face(image)
-        if face is None:
-            return None, None, "No face detected"
-        # Preprocess the face image
-        inputs = self.preprocess_image(face)
-        if inputs is None:
-            return None, None, "Error processing image"
-        # Make prediction
-        with torch.no_grad():
-            outputs = self.model(**inputs)
-            logits = outputs.logits
-            probs = torch.softmax(logits, dim=1)
-            pred_class = torch.argmax(probs, dim=1).item()
-            pred_label = self.id2label[pred_class]
-            pred_prob = probs[0, pred_class].item()
-        # Return drowsy probability (class 1)
-        drowsy_prob = probs[0, 1].item()
-        return drowsy_prob, face_coords, None
-# Initialize detector
-detector = DrowsinessDetector()
-def find_model_file():
-    """Find the model directory or file in common locations"""
-    possible_paths = [
-        "huggingface_model",  # 優先資料夾
-        "pytorch_model.bin",
-        "model_weights.h5",
-        "drowsiness_model.h5",
-        "model/drowsiness_model.h5",
-        "models/drowsiness_model.h5",
-        "huggingface_model/model_weights.h5",
-        "huggingface_model/drowsiness_model.h5",
-        "../model_weights.h5",
-        "../drowsiness_model.h5"
-    ]
-    for path in possible_paths:
-        if os.path.exists(path):
-            return path
-    return None
-def load_model():
-    """Load the model"""
-    model_path = find_model_file()
-    if model_path is None:
-        print("\nError: Model file not found!")
-        print("\nPlease ensure one of the following files exists:")
-        print("1. model_weights.h5")
-        print("2. drowsiness_model.h5")
-        print("3. model/drowsiness_model.h5")
-        print("4. models/drowsiness_model.h5")
-        print("\nYou can download the model from Hugging Face Hub or train it using train_model.py")
-        sys.exit(1)
-    try:
-        detector.load_model(model_path)
-    except Exception as e:
-        print(f"\nError loading model: {str(e)}")
-        sys.exit(1)
-def process_frame(frame):
-    """Process a single frame"""
-    if frame is None:
-        return None
-    try:
-        # Convert frame to RGB if needed
-        if len(frame.shape) == 2:
-            frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
-        elif frame.shape[2] == 4:
-            frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2RGB)
-        # Make prediction
-        drowsy_prob, face_coords, error = detector.predict(frame)
-        if error:
-            return frame
-        if face_coords is not None:
-            x, y, w, h = face_coords
-            # Draw rectangle around face
-            color = (0, 0, 255) if drowsy_prob > 0.7 else (0, 255, 0)
-            cv2.rectangle(frame, (x, y), (x+w, y+h), color, 2)
-            # Add text
-            status = "DROWSY" if drowsy_prob > 0.7 else "ALERT"
-            cv2.putText(frame, f"{status} ({drowsy_prob:.2%})",
-                       (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
-        return frame
-    except Exception as e:
-        print(f"Error processing frame: {str(e)}")
-        return frame
-def process_video(video_input):
-    """Process video input"""
-    if video_input is None:
-        return None
-    try:
-        # Get input video properties
-        cap = cv2.VideoCapture(video_input)
-        fps = cap.get(cv2.CAP_PROP_FPS)
-        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        # Create temporary output video file
-        temp_output = "temp_output.mp4"
-        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-        out = cv2.VideoWriter(temp_output, fourcc, fps, (width, height))
-        while True:
-            ret, frame = cap.read()
-            if not ret:
-                break
-            processed_frame = process_frame(frame)
-            if processed_frame is not None:
-                out.write(processed_frame)
-        # Release resources
-        cap.release()
-        out.release()
-        # Check if video was created
-        if os.path.exists(temp_output) and os.path.getsize(temp_output) > 0:
-            return temp_output
-        else:
-            print("Error: Failed to create output video")
-            return None
-    except Exception as e:
-        print(f"Error processing video: {str(e)}")
-        return None
-    finally:
-        # Clean up temporary file
-        if 'out' in locals():
-            out.release()
-        if 'cap' in locals():
-            cap.release()
-def webcam_feed():
-    """Process webcam feed"""
-    try:
-        cap = cv2.VideoCapture(0)
-        while True:
-            ret, frame = cap.read()
-            if not ret:
-                break
-            processed_frame = process_frame(frame)
-            if processed_frame is not None:
-                yield processed_frame
-    except Exception as e:
-        print(f"Error processing webcam feed: {str(e)}")
-        yield None
-    finally:
-        cap.release()
-# Load the model at startup
-load_model()
-# Create interface
-with gr.Blocks(title="Driver Drowsiness Detection") as demo:
-    gr.Markdown("""
-    # 🚗 Driver Drowsiness Detection System
-    This system detects driver drowsiness using computer vision and deep learning.
-    ## Features:
-    - Real-time webcam monitoring
-    - Video file processing
-    - Single image analysis
-    - Face detection and drowsiness prediction
-    """)
-    with gr.Tabs():
-        with gr.Tab("Webcam"):
-            gr.Markdown("Real-time drowsiness detection using your webcam")
-            webcam_output = gr.Image(label="Live Detection")
-            webcam_button = gr.Button("Start Webcam")
-            webcam_button.click(fn=webcam_feed, inputs=None, outputs=webcam_output)
-        with gr.Tab("Video"):
-            gr.Markdown("Upload a video file for drowsiness detection")
-            with gr.Row():
-                video_input = gr.Video(label="Input Video")
-                video_output = gr.Video(label="Detection Result")
-            video_button = gr.Button("Process Video")
-            video_button.click(fn=process_video, inputs=video_input, outputs=video_output)
-        with gr.Tab("Image"):
-            gr.Markdown("Upload an image for drowsiness detection")
-            with gr.Row():
-                image_input = gr.Image(type="numpy", label="Input Image")
-                image_output = gr.Image(label="Detection Result")
-            image_button = gr.Button("Process Image")
-            image_button.click(fn=process_frame, inputs=image_input, outputs=image_output)
-if __name__ == "__main__":
     demo.launch()

+import gradio as gr
+import torch
+from transformers import ViTForImageClassification, ViTImageProcessor
+import numpy as np
+import cv2
+from PIL import Image
+import io
+import os
+class DrowsinessDetector:
+    def __init__(self):
+        self.model = None
+        self.processor = None
+        self.input_shape = (224, 224, 3)
+        self.face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
+        self.id2label = {0: "notdrowsy", 1: "drowsy"}
+        self.label2id = {"notdrowsy": 0, "drowsy": 1}
+    def load_model(self):
+        """Load the ViT model and processor from Hugging Face Hub"""
+        try:
+            model_id = "ckcl/driver-drowsiness-detector"  # 使用你的模型ID
+            self.model = ViTForImageClassification.from_pretrained(
+                model_id,
+                num_labels=2,
+                id2label=self.id2label,
+                label2id=self.label2id,
+                ignore_mismatched_sizes=True
+            )
+            self.model.eval()
+            self.processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224")
+            print(f"ViT model loaded successfully from {model_id}")
+        except Exception as e:
+            print(f"Error loading ViT model: {str(e)}")
+            raise
+    def detect_face(self, frame):
+        """Detect face in the frame"""
+        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+        faces = self.face_cascade.detectMultiScale(gray, 1.1, 4)
+        if len(faces) > 0:
+            (x, y, w, h) = faces[0]  # Get the first face
+            face = frame[y:y+h, x:x+w]
+            return face, (x, y, w, h)
+        return None, None
+    def preprocess_image(self, image):
+        """Preprocess the input image for ViT"""
+        if image is None:
+            return None
+        pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+        inputs = self.processor(images=pil_img, return_tensors="pt")
+        return inputs
+    def predict(self, image):
+        """Make prediction on the input image using ViT"""
+        if self.model is None or self.processor is None:
+            raise ValueError("Model not loaded. Call load_model() first.")
+        # Detect face
+        face, face_coords = self.detect_face(image)
+        if face is None:
+            return None, None, "No face detected"
+        # Preprocess the face image
+        inputs = self.preprocess_image(face)
+        if inputs is None:
+            return None, None, "Error processing image"
+        # Make prediction
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+            logits = outputs.logits
+            probs = torch.softmax(logits, dim=1)
+            pred_class = torch.argmax(probs, dim=1).item()
+            pred_label = self.id2label[pred_class]
+            pred_prob = probs[0, pred_class].item()
+        # Return drowsy probability (class 1)
+        drowsy_prob = probs[0, 1].item()
+        return drowsy_prob, face_coords, None
+# Initialize detector
+detector = DrowsinessDetector()
+def process_image(image):
+    """Process a single image"""
+    if image is None:
+        return None, "No image provided"
+    try:
+        # Convert image to numpy array if it's a PIL Image
+        if isinstance(image, Image.Image):
+            image = np.array(image)
+        # Convert frame to RGB if needed
+        if len(image.shape) == 2:
+            image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
+        elif image.shape[2] == 4:
+            image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
+        # Make prediction
+        drowsy_prob, face_coords, error = detector.predict(image)
+        if error:
+            return image, error
+        if face_coords is not None:
+            x, y, w, h = face_coords
+            # Draw rectangle around face
+            color = (0, 0, 255) if drowsy_prob > 0.7 else (0, 255, 0)
+            cv2.rectangle(image, (x, y), (x+w, y+h), color, 2)
+            # Add text
+            status = "DROWSY" if drowsy_prob > 0.7 else "ALERT"
+            cv2.putText(image, f"{status} ({drowsy_prob:.2%})",
+                       (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
+            return image, f"Status: {status} (Confidence: {drowsy_prob:.2%})"
+        else:
+            return image, "No face detected"
+    except Exception as e:
+        return image, f"Error processing image: {str(e)}"
+def process_video(video):
+    """Process video input"""
+    if video is None:
+        return None, "No video provided"
+    try:
+        # Get input video properties
+        cap = cv2.VideoCapture(video)
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        # Create temporary output video file
+        temp_output = "temp_output.mp4"
+        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+        out = cv2.VideoWriter(temp_output, fourcc, fps, (width, height))
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                break
+            processed_frame = process_image(frame)[0]
+            if processed_frame is not None:
+                out.write(processed_frame)
+        # Release resources
+        cap.release()
+        out.release()
+        # Check if video was created
+        if os.path.exists(temp_output) and os.path.getsize(temp_output) > 0:
+            return temp_output, "Video processed successfully"
+        else:
+            return None, "Error: Failed to create output video"
+    except Exception as e:
+        return None, f"Error processing video: {str(e)}"
+    finally:
+        # Clean up temporary file
+        if 'out' in locals():
+            out.release()
+        if 'cap' in locals():
+            cap.release()
+# Load the model at startup
+detector.load_model()
+# Create interface
+with gr.Blocks(title="Driver Drowsiness Detection") as demo:
+    gr.Markdown("""
+    # 🚗 Driver Drowsiness Detection System
+    This system detects driver drowsiness using computer vision and deep learning.
+    ## Features:
+    - Image analysis
+    - Video processing
+    - Face detection and drowsiness prediction
+    """)
+    with gr.Tabs():
+        with gr.Tab("Image"):
+            gr.Markdown("Upload an image for drowsiness detection")
+            with gr.Row():
+                image_input = gr.Image(label="Input Image", type="numpy")
+                image_output = gr.Image(label="Processed Image")
+            with gr.Row():
+                status_output = gr.Textbox(label="Status")
+            image_input.change(
+                fn=process_image,
+                inputs=[image_input],
+                outputs=[image_output, status_output]
+            )
+        with gr.Tab("Video"):
+            gr.Markdown("Upload a video file for drowsiness detection")
+            with gr.Row():
+                video_input = gr.Video(label="Input Video")
+                video_output = gr.Video(label="Processed Video")
+            with gr.Row():
+                video_status = gr.Textbox(label="Status")
+            video_input.change(
+                fn=process_video,
+                inputs=[video_input],
+                outputs=[video_output, video_status]
+            )
+# Launch the app
+if __name__ == "__main__":
     demo.launch()