Spaces:

Aumkeshchy2003
/

Object_Detection

Running

App Files Files Community

Aumkeshchy2003 commited on Feb 27, 2025

Commit

3100b46

verified ·

1 Parent(s): a228709

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -118

app.py CHANGED Viewed

@@ -1,131 +1,71 @@
 import torch
 import numpy as np
 import gradio as gr
-import cv2
-import time
-import os
-from pathlib import Path
-# Create cache directory for models
-os.makedirs("models", exist_ok=True)
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-print(f"Using device: {device}")
-# Load YOLOv5n model (corrected from original)
-model_path = Path("models/yolov5n.pt")
-if model_path.exists():
-    print(f"Loading model from cache: {model_path}")
-    model = torch.hub.load("ultralytics/yolov5", "yolov5n", pretrained=True,
-                          source="local", path=str(model_path)).to(device)
-else:
-    print("Downloading YOLOv5n model and caching...")
-    model = torch.hub.load("ultralytics/yolov5", "yolov5n", pretrained=True).to(device)
-    torch.save(model.state_dict(), model_path)
-# Model configurations
-model.conf = 0.6
-model.iou = 0.45
-model.classes = None
-# Optimizations
-if device.type == "cuda":
-    model.half()
-    torch.backends.cudnn.benchmark = True
-else:
-    torch.set_num_threads(os.cpu_count())
-model.eval()
-np.random.seed(42)
-colors = np.random.uniform(0, 255, size=(len(model.names), 3))
-total_inference_time = 0
-inference_count = 0
 def detect_objects(image):
-    global total_inference_time, inference_count
-    if image is None:
-        return None
-    # Convert RGB to BGR for OpenCV operations
-    image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
-    output_image = image_bgr.copy()
-    start_time = time.time()
-    # Convert to RGB for model inference
-    img_rgb = cv2.cvtColor(output_image, cv2.COLOR_BGR2RGB)
-    with torch.no_grad():
-        results = model(img_rgb, size=320)  # Reduced input size for speed
-    inference_time = time.time() - start_time
-    total_inference_time += inference_time
-    inference_count += 1
-    avg_inference_time = total_inference_time / inference_count
-    detections = results.pred[0].cpu().numpy()
-    for *xyxy, conf, cls in detections:
-        x1, y1, x2, y2 = map(int, xyxy)
-        class_id = int(cls)
-        color = colors[class_id].tolist()
-        # Draw bounding boxes
-        cv2.rectangle(output_image, (x1, y1), (x2, y2), color, 2, lineType=cv2.LINE_AA)
-        # Draw labels
-        label = f"{model.names[class_id]} {conf:.2f}"
-        (w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
-        cv2.rectangle(output_image, (x1, y1 - 20), (x1 + w, y1), color, -1)
-        cv2.putText(output_image, label, (x1, y1 - 5),
-                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1, lineType=cv2.LINE_AA)
-    # Convert back to RGB for Gradio
-    output_image_rgb = cv2.cvtColor(output_image, cv2.COLOR_BGR2RGB)
-    # Draw performance metrics
-    fps = 1 / inference_time
-    cv2.putText(output_image_rgb, f"FPS: {fps:.1f}", (10, 30),
-                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2, lineType=cv2.LINE_AA)
-    cv2.putText(output_image_rgb, f"Avg FPS: {1/avg_inference_time:.1f}", (10, 60),
-                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2, lineType=cv2.LINE_AA)
-    return output_image_rgb
-# Example images
-example_images = ["spring_street_after.jpg", "pexels-hikaique-109919.jpg"]
-os.makedirs("examples", exist_ok=True)
-with gr.Blocks(title="Real-time YOLOv5 Object Detection") as demo:
-    gr.Markdown("""
-    # Real-time YOLOv5 Object Detection
-    - Real-time webcam detection (30+ FPS on GPU)
-    - Image upload capability
-    - Performance optimized with half-precision and CUDA acceleration
-    """)
-    with gr.Tab("🎥 Real-time Webcam"):
-        with gr.Row():
-            webcam = gr.Image(source="webcam", streaming=True, label="Live Webcam Feed")
-            live_output = gr.Image(label="Detection Results")
-        webcam.stream(fn=detect_objects, inputs=webcam, outputs=live_output)
-    with gr.Tab("📸 Image Upload"):
-        with gr.Row():
-            with gr.Column():
-                input_image = gr.Image(type="numpy", label="Input Image")
-                gr.Examples(examples=example_images, inputs=input_image)
-                with gr.Row():
-                    submit_btn = gr.Button("Detect Objects", variant="primary")
-                    clear_btn = gr.Button("Clear")
-            with gr.Column():
-                output_image = gr.Image(type="numpy", label="Processed Image")
-        submit_btn.click(fn=detect_objects, inputs=input_image, outputs=output_image)
-        clear_btn.click(lambda: (None, None), outputs=[input_image, output_image])
-demo.launch()

+import cv2
 import torch
 import numpy as np
 import gradio as gr
+from ultralytics import YOLO
+import threading
+# Load YOLOv5 model (optimized for CUDA if available)
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+model = YOLO("yolov5s.pt").to(device)
 def detect_objects(image):
+    """Detect objects in an uploaded image."""
+    results = model(image)
+    detections = results[0].boxes.data.cpu().numpy()  # Get detections
+    for box in detections:
+        x1, y1, x2, y2, conf, cls = map(int, box[:6])
+        label = f"{model.names[cls]} {conf:.2f}"
+        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
+        cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+    return image
+# Real-time webcam processing
+cap = cv2.VideoCapture(0)  # Capture from webcam
+frame = None
+lock = threading.Lock()
+def process_webcam():
+    global frame
+    while True:
+        ret, img = cap.read()
+        if not ret:
+            continue
+        results = model(img)
+        detections = results[0].boxes.data.cpu().numpy()
+        for box in detections:
+            x1, y1, x2, y2, conf, cls = map(int, box[:6])
+            label = f"{model.names[cls]} {conf:.2f}"
+            cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
+            cv2.putText(img, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+        with lock:
+            frame = img
+# Start the webcam thread
+threading.Thread(target=process_webcam, daemon=True).start()
+def get_webcam_frame():
+    """Returns the latest processed webcam frame."""
+    with lock:
+        return frame if frame is not None else np.zeros((480, 640, 3), dtype=np.uint8)
+# Gradio UI
+demo = gr.Blocks()
+with demo:
+    gr.Markdown("# YOLOv5 Real-Time Object Detection")
+    with gr.Tabs():
+        with gr.Tab("Real-Time Webcam"):
+            gr.Video(get_webcam_frame, streaming=True)
+        with gr.Tab("Upload Image"):
+            image_input = gr.Image(type="numpy")
+            image_output = gr.Image()
+            image_button = gr.Button("Detect Objects")
+            image_button.click(detect_objects, inputs=image_input, outputs=image_output)
+demo.launch()