Spaces:
Sleeping
Sleeping
| import cv2 | |
| import numpy as np | |
| # Load the SSD model and configuration | |
| model_path = 'saved_model.pb' # Path to the pre-trained SSD model | |
| config_path = 'pipeline.config' # Path to the deploy prototxt file | |
| # Load the class labels from the COCO dataset | |
| CLASSES = [ | |
| 'background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', | |
| 'truck', 'boat', 'traffic light', 'fire hydrant', 'none', 'stop sign', 'parking meter', | |
| 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', | |
| 'giraffe', 'none', 'backpack', 'umbrella', 'none', 'handbag', 'tie', 'suitcase', | |
| 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', | |
| 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'none', 'wine glass', 'cup', | |
| 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', | |
| 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', | |
| 'bed', 'dining table', 'toilet', 'none', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', | |
| 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', | |
| 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' | |
| ] | |
| # Initialize the OpenCV DNN network | |
| net = cv2.dnn.readNetFromTensorflow(config_path,model_path) | |
| # Function to process the video frame and detect objects | |
| def detect_objects_in_frame(frame): | |
| # Get the image shape | |
| height, width = frame.shape[:2] | |
| # Prepare the frame for the model (mean subtraction and resizing) | |
| blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300), (127.5, 127.5, 127.5), swapRB=True, crop=False) | |
| # Set the blob as input to the network | |
| net.setInput(blob) | |
| # Run the forward pass to get predictions | |
| detections = net.forward() | |
| # Loop through all the detections | |
| for i in range(detections.shape[2]): | |
| confidence = detections[0, 0, i, 2] | |
| if confidence > 0.5: # Set a threshold for object detection | |
| # Get the class index and the bounding box coordinates | |
| class_id = int(detections[0, 0, i, 1]) | |
| left = int(detections[0, 0, i, 3] * width) | |
| top = int(detections[0, 0, i, 4] * height) | |
| right = int(detections[0, 0, i, 5] * width) | |
| bottom = int(detections[0, 0, i, 6] * height) | |
| # Draw the bounding box and label on the frame | |
| label = f"{CLASSES[class_id]}: {confidence:.2f}" | |
| cv2.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 2) | |
| cv2.putText(frame, label, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) | |
| return frame | |
| import gradio as gr | |
| from gradio_webrtc import WebRTC | |
| css = """.my-group {max-width: 600px !important; max-height: 600px !important;} | |
| .my-column {display: flex !important; justify-content: center !important; align-items: center !important;}""" | |
| with gr.Blocks(css=css) as demo: | |
| gr.HTML( | |
| """ | |
| <h1 style='text-align: center'> | |
| YOLOv10 Webcam Stream (Powered by WebRTC ⚡️) | |
| </h1> | |
| """ | |
| ) | |
| with gr.Column(elem_classes=["my-column"]): | |
| with gr.Group(elem_classes=["my-group"]): | |
| image = WebRTC(label="Stream", rtc_configuration=None) | |
| conf_threshold = gr.Slider( | |
| label="Confidence Threshold", | |
| minimum=0.0, | |
| maximum=1.0, | |
| step=0.05, | |
| value=0.30, | |
| ) | |
| image.stream( | |
| fn=detect_objects_in_frame, inputs=[image, conf_threshold], outputs=[image], time_limit=10 | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |