Spaces:
Sleeping
Sleeping
| import cv2 | |
| import random | |
| from ultralytics import YOLO | |
| from gtts import gTTS | |
| from datetime import datetime, timedelta | |
| import gradio as gr | |
| # Load YOLOv8 model | |
| yolo = YOLO("yolov8n.pt") | |
| # Audio alert settings | |
| alert_categories = {"person", "cat", "dog", "knife", "fire", "gun"} | |
| last_alert_time = {} | |
| alert_cooldown = timedelta(seconds=10) | |
| # Create audio alert as downloadable file | |
| def generate_audio_alert(label, position): | |
| phrases = [ | |
| f"Be careful, there's a {label} on your {position}.", | |
| f"Watch out! {label} detected on your {position}.", | |
| f"Alert! A {label} is on your {position}.", | |
| ] | |
| caution_note = random.choice(phrases) | |
| # Save audio alert as an MP3 file | |
| temp_file_path = f"audio_alert_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3" | |
| tts = gTTS(caution_note) | |
| tts.save(temp_file_path) | |
| return temp_file_path | |
| # Process a single frame | |
| def process_frame(image, enable_audio): | |
| results = yolo(image) | |
| result = results[0] | |
| detected_objects = {} | |
| audio_files = [] | |
| for box in result.boxes: | |
| x1, y1, x2, y2 = map(int, box.xyxy[0]) | |
| label = result.names[int(box.cls[0])] | |
| if enable_audio and label in alert_categories: | |
| frame_center_x = image.shape[1] // 2 | |
| obj_center_x = (x1 + x2) // 2 | |
| position = "left" if obj_center_x < frame_center_x else "right" | |
| detected_objects[label] = position | |
| current_time = datetime.now() | |
| if ( | |
| label not in last_alert_time | |
| or current_time - last_alert_time[label] > alert_cooldown | |
| ): | |
| audio_file = generate_audio_alert(label, position) | |
| audio_files.append(audio_file) | |
| last_alert_time[label] = current_time | |
| # Draw bounding boxes | |
| cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2) | |
| cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) | |
| return image, audio_files | |
| # Gradio interface function | |
| def object_detection_webcam(enable_audio): | |
| cap = cv2.VideoCapture(0) | |
| if not cap.isOpened(): | |
| return "Error: Unable to access the camera." | |
| while True: | |
| ret, frame = cap.read() | |
| if not ret: | |
| return "Error: Unable to read from camera." | |
| frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
| processed_frame, audio_files = process_frame(frame, enable_audio) | |
| yield {"image": processed_frame, "audio": audio_files} | |
| cap.release() | |
| # Gradio UI | |
| def gradio_app(): | |
| return gr.Interface( | |
| fn=object_detection_webcam, | |
| inputs=[gr.Checkbox(label="Enable Audio Alerts", value=False)], | |
| outputs=[ | |
| gr.Image(label="Processed Frame"), | |
| gr.File(label="Audio Alerts"), | |
| ], | |
| live=True, | |
| ) | |
| gradio_app().launch() | |