Spaces:

Hamza011
/

object_detect

Sleeping

App Files Files Community

object_detect / app.py

Hamza011

Update app.py

a5abe0c verified about 1 year ago

raw

history blame

3.7 kB

	import cv2
	import numpy as np

	# Load the SSD model and configuration
	model_path = 'saved_model.pb' # Path to the pre-trained SSD model
	config_path = 'pipeline.config' # Path to the deploy prototxt file

	# Load the class labels from the COCO dataset
	CLASSES = [
	'background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
	'truck', 'boat', 'traffic light', 'fire hydrant', 'none', 'stop sign', 'parking meter',
	'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
	'giraffe', 'none', 'backpack', 'umbrella', 'none', 'handbag', 'tie', 'suitcase',
	'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
	'skateboard', 'surfboard', 'tennis racket', 'bottle', 'none', 'wine glass', 'cup',
	'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
	'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
	'bed', 'dining table', 'toilet', 'none', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
	'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
	'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
	]

	# Initialize the OpenCV DNN network
	net = cv2.dnn.readNetFromTensorflow(config_path,model_path)

	# Function to process the video frame and detect objects
	def detect_objects_in_frame(frame):
	# Get the image shape
	height, width = frame.shape[:2]

	# Prepare the frame for the model (mean subtraction and resizing)
	blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300), (127.5, 127.5, 127.5), swapRB=True, crop=False)

	# Set the blob as input to the network
	net.setInput(blob)

	# Run the forward pass to get predictions
	detections = net.forward()

	# Loop through all the detections
	for i in range(detections.shape[2]):
	confidence = detections[0, 0, i, 2]

	if confidence > 0.5: # Set a threshold for object detection
	# Get the class index and the bounding box coordinates
	class_id = int(detections[0, 0, i, 1])
	left = int(detections[0, 0, i, 3] * width)
	top = int(detections[0, 0, i, 4] * height)
	right = int(detections[0, 0, i, 5] * width)
	bottom = int(detections[0, 0, i, 6] * height)

	# Draw the bounding box and label on the frame
	label = f"{CLASSES[class_id]}: {confidence:.2f}"
	cv2.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 2)
	cv2.putText(frame, label, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

	return frame

	import gradio as gr
	from gradio_webrtc import WebRTC

	css = """.my-group {max-width: 600px !important; max-height: 600px !important;}
	.my-column {display: flex !important; justify-content: center !important; align-items: center !important;}"""

	with gr.Blocks(css=css) as demo:
	gr.HTML(
	"""
	<h1 style='text-align: center'>
	YOLOv10 Webcam Stream (Powered by WebRTC ⚡️)
	</h1>
	"""
	)
	with gr.Column(elem_classes=["my-column"]):
	with gr.Group(elem_classes=["my-group"]):
	image = WebRTC(label="Stream", rtc_configuration=None)
	conf_threshold = gr.Slider(
	label="Confidence Threshold",
	minimum=0.0,
	maximum=1.0,
	step=0.05,
	value=0.30,
	)

	image.stream(
	fn=detect_objects_in_frame, inputs=[image, conf_threshold], outputs=[image], time_limit=10
	)

	if __name__ == "__main__":
	demo.launch()