Spaces:

KPrashanth
/

Yolo_detect

Sleeping

App Files Files Community

KPrashanth commited on Feb 22

Commit

146b696

verified ·

1 Parent(s): 44d73d2

Upload app.py

Browse files

Files changed (1) hide show

app.py +103 -140

app.py CHANGED Viewed

@@ -1,181 +1,144 @@
-import os
 import cv2
 import numpy as np
 import gradio as gr
 from ultralytics import YOLO
-# ----------------------------
-# Config
-# ----------------------------
-MODEL_NAME = os.getenv("YOLO_MODEL", "yolov8n.pt")
-CLASS_OF_INTEREST = "person"
-# Danger zone: top-left, bottom-right (x, y)
-DANGER_ZONE = ((100, 100), (400, 400))
-# Inference config
-CONF_THRES = 0.35
-IMG_SIZE = 640
-# ----------------------------
 # Load model once (global)
-# ----------------------------
 model = YOLO(MODEL_NAME)
-# Build class-name -> id mapping once (YOLOv8 COCO)
-# For yolov8n.pt, names is dict {id: name}
-NAMES = model.names
-PERSON_CLASS_ID = None
-for k, v in NAMES.items():
-    if v == CLASS_OF_INTEREST:
-        PERSON_CLASS_ID = int(k)
-        break
-if PERSON_CLASS_ID is None:
-    raise RuntimeError("Could not find 'person' class in model.names")
-# ----------------------------
-# Helpers
-# ----------------------------
-def overlaps_zone(box_xyxy, zone):
-    """True if box overlaps danger zone (partial overlap)."""
-    x1, y1, x2, y2 = box_xyxy
     (zx1, zy1), (zx2, zy2) = zone
     overlap_x = (x1 < zx2) and (x2 > zx1)
     overlap_y = (y1 < zy2) and (y2 > zy1)
     return overlap_x and overlap_y
-def make_beep(sr=22050, freq=880, duration=0.25):
-    """Return a short beep waveform for browser playback."""
-    t = np.linspace(0, duration, int(sr * duration), endpoint=False)
-    wave = 0.2 * np.sin(2 * np.pi * freq * t)  # low volume
-    return (sr, wave.astype(np.float32))
-BEEP_AUDIO = make_beep()
-# ----------------------------
-# Frame processor
-# ----------------------------
-def process_frame(frame, zone_x1, zone_y1, zone_x2, zone_y2, conf_thres):
     """
-    frame: numpy array RGB from gradio
-    returns:
-      - annotated RGB frame
-      - grayscale RGB frame
-      - infrared frame (RGB)
-      - beep audio tuple or None
-      - status text
     """
     if frame is None:
-        return None, None, None, None, "No frame"
-    # Gradio gives RGB; OpenCV prefers BGR for drawing
-    rgb = frame
-    bgr = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)
-    zone = ((int(zone_x1), int(zone_y1)), (int(zone_x2), int(zone_y2)))
-    # Derived feeds
-    gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY)
-    gray_bgr = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
-    infrared = cv2.applyColorMap(gray, cv2.COLORMAP_JET)
-    # Draw danger zone
-    for img in (bgr, gray_bgr, infrared):
-        cv2.rectangle(img, zone[0], zone[1], (0, 0, 255), 2)
-    # YOLO inference (stream=False for single image)
-    # verbose=False keeps logs clean
-    results = model.predict(
-        source=bgr,
-        imgsz=IMG_SIZE,
-        conf=float(conf_thres),
-        verbose=False
-    )
-    alert = False
-    det_count = 0
-    r = results[0]
-    if r.boxes is not None and len(r.boxes) > 0:
-        boxes = r.boxes.xyxy.cpu().numpy().astype(int)
-        cls_ids = r.boxes.cls.cpu().numpy().astype(int)
-        confs = r.boxes.conf.cpu().numpy()
-        for (x1, y1, x2, y2), cid, c in zip(boxes, cls_ids, confs):
-            if cid != PERSON_CLASS_ID:
-                continue
-            det_count += 1
-            label = f"person: {c:.2f}"
-            # draw bbox
-            for img in (bgr, gray_bgr, infrared):
-                cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2)
-                cv2.putText(img, label, (x1, max(15, y1 - 8)),
-                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)
-            if overlaps_zone((x1, y1, x2, y2), zone):
-                alert = True
-    if alert:
-        for img in (bgr, gray_bgr, infrared):
-            cv2.putText(img, "ALERT: Person in danger zone", (20, 45),
-                        cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 3)
-        status = f"🚨 ALERT! persons detected: {det_count}"
-        beep = BEEP_AUDIO
     else:
-        status = f"✅ OK (persons detected: {det_count})"
-        beep = None
-    # Convert back to RGB for gradio display
     out_rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
-    out_gray = cv2.cvtColor(gray_bgr, cv2.COLOR_BGR2RGB)
-    out_infra = cv2.cvtColor(infrared, cv2.COLOR_BGR2RGB)
-    return out_rgb, out_gray, out_infra, beep, status
-# ----------------------------
-# Gradio UI
-# ----------------------------
-with gr.Blocks(title="YOLOv8 Danger Zone Demo") as demo:
     gr.Markdown(
-        """
-# YOLOv8 Danger Zone Detection (Demo)
-- Uses browser webcam input (works on Hugging Face Spaces)
-- Detects **person** and triggers **alert** if they overlap the danger zone
-"""
     )
     with gr.Row():
-        cam = gr.Image(sources=["webcam"],streaming=True,type="numpy",label="Webcam (Input)")
         with gr.Column():
-            zone_x1 = gr.Slider(0, 1280, value=DANGER_ZONE[0][0], step=1, label="Zone x1")
-            zone_y1 = gr.Slider(0, 720, value=DANGER_ZONE[0][1], step=1, label="Zone y1")
-            zone_x2 = gr.Slider(0, 1280, value=DANGER_ZONE[1][0], step=1, label="Zone x2")
-            zone_y2 = gr.Slider(0, 720, value=DANGER_ZONE[1][1], step=1, label="Zone y2")
-            conf = gr.Slider(0.05, 0.90, value=CONF_THRES, step=0.01, label="Confidence Threshold")
-    with gr.Row():
-        out1 = gr.Image(type="numpy", label="Color (Annotated)")
-        out2 = gr.Image(type="numpy", label="Grayscale (Annotated)")
-        out3 = gr.Image(type="numpy", label="Infrared (Annotated)")
-    with gr.Row():
-        alert_audio = gr.Audio(label="Alert Beep (plays when triggered)", autoplay=True)
-        status = gr.Textbox(label="Status", interactive=False)
     cam.stream(
         fn=process_frame,
-        inputs=[cam, zone_x1, zone_y1, zone_x2, zone_y2, conf],
-        outputs=[out1, out2, out3, alert_audio, status],
-        show_progress=False
     )
-demo.queue().launch()

 import cv2
 import numpy as np
 import gradio as gr
 from ultralytics import YOLO
+# -------------------------
 # Load model once (global)
+# -------------------------
+MODEL_NAME = "yolov8n.pt"
 model = YOLO(MODEL_NAME)
+CLASS_OF_INTEREST = "person"
+def is_in_danger_zone(box, zone):
+    """
+    box: (x1, y1, x2, y2)
+    zone: ((zx1, zy1), (zx2, zy2))
+    overlap logic: any partial overlap triggers True
+    """
+    x1, y1, x2, y2 = box
     (zx1, zy1), (zx2, zy2) = zone
     overlap_x = (x1 < zx2) and (x2 > zx1)
     overlap_y = (y1 < zy2) and (y2 > zy1)
     return overlap_x and overlap_y
+def process_frame(frame, zx1, zy1, zx2, zy2, conf_thres):
     """
+    frame: numpy array (H, W, 3) from Gradio webcam (RGB)
+    returns: annotated frame (RGB), status markdown
     """
     if frame is None:
+        return None, "Waiting for webcam input…"
+    # Gradio gives RGB; OpenCV drawing expects BGR
+    bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
+    h, w = bgr.shape[:2]
+    # Clamp and fix zone coordinates
+    zx1 = int(np.clip(zx1, 0, w - 1))
+    zx2 = int(np.clip(zx2, 0, w - 1))
+    zy1 = int(np.clip(zy1, 0, h - 1))
+    zy2 = int(np.clip(zy2, 0, h - 1))
+    if zx2 < zx1:
+        zx1, zx2 = zx2, zx1
+    if zy2 < zy1:
+        zy1, zy2 = zy2, zy1
+    danger_zone = ((zx1, zy1), (zx2, zy2))
+    # Draw danger zone
+    cv2.rectangle(bgr, danger_zone[0], danger_zone[1], (0, 0, 255), 2)
+    # Run YOLO (on original RGB frame or BGR — ultralytics handles numpy arrays)
+    results = model.predict(source=frame, conf=float(conf_thres), verbose=False)
+    alert_triggered = False
+    persons_in_zone = 0
+    persons_total = 0
+    for r in results:
+        names = r.names
+        if r.boxes is None:
+            continue
+        boxes_xyxy = r.boxes.xyxy.cpu().numpy() if hasattr(r.boxes.xyxy, "cpu") else np.array(r.boxes.xyxy)
+        cls_ids = r.boxes.cls.cpu().numpy() if hasattr(r.boxes.cls, "cpu") else np.array(r.boxes.cls)
+        confs = r.boxes.conf.cpu().numpy() if hasattr(r.boxes.conf, "cpu") else np.array(r.boxes.conf)
+        for (x1, y1, x2, y2), cls_id, cf in zip(boxes_xyxy, cls_ids, confs):
+            class_name = names[int(cls_id)]
+            if class_name != CLASS_OF_INTEREST:
+                continue
+            persons_total += 1
+            x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])
+            # Draw person bbox
+            cv2.rectangle(bgr, (x1, y1), (x2, y2), (255, 0, 0), 2)
+            label = f"{class_name}: {float(cf):.2f}"
+            cv2.putText(bgr, label, (x1, max(20, y1 - 10)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)
+            # Zone overlap check
+            if is_in_danger_zone((x1, y1, x2, y2), danger_zone):
+                alert_triggered = True
+                persons_in_zone += 1
+    if alert_triggered:
+        cv2.putText(
+            bgr,
+            f"ALERT! {persons_in_zone} person(s) in danger zone",
+            (20, 40),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            1.0,
+            (0, 0, 255),
+            3
+        )
+        status = f"## 🔴 ALERT\n**{persons_in_zone}** person(s) inside danger zone.\n\nTotal persons detected: **{persons_total}**"
     else:
+        status = f"## ✅ SAFE\nNo person inside danger zone.\n\nTotal persons detected: **{persons_total}**"
+    # Convert back to RGB for Gradio output
     out_rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
+    return out_rgb, status
+with gr.Blocks(title="YOLOv8 Danger Zone (Webcam)") as demo:
     gr.Markdown(
+        "# YOLOv8 Danger Zone Detection (Webcam)\n"
+        "Use your webcam, define a rectangular danger zone, and detect if any **person** enters it.\n\n"
+        "**Note:** On Hugging Face Spaces, server-side audio (pygame) isn’t reliable. We show a clear on-screen alert instead."
     )
     with gr.Row():
         with gr.Column():
+            cam = gr.Image(
+                label="Webcam Input",
+                sources=["webcam"],
+                type="numpy"
+            )
+        with gr.Column():
+            out = gr.Image(label="Annotated Output", type="numpy")
+            status_md = gr.Markdown("Waiting for webcam input…")
+    with gr.Accordion("Danger Zone Controls", open=True):
+        with gr.Row():
+            zx1 = gr.Slider(0, 1280, value=100, step=1, label="Zone X1 (left)")
+            zy1 = gr.Slider(0, 720, value=100, step=1, label="Zone Y1 (top)")
+        with gr.Row():
+            zx2 = gr.Slider(0, 1280, value=400, step=1, label="Zone X2 (right)")
+            zy2 = gr.Slider(0, 720, value=400, step=1, label="Zone Y2 (bottom)")
+        conf = gr.Slider(0.1, 0.9, value=0.35, step=0.05, label="Confidence Threshold")
+    # Stream webcam frames to backend (Gradio 5 streaming)
     cam.stream(
         fn=process_frame,
+        inputs=[cam, zx1, zy1, zx2, zy2, conf],
+        outputs=[out, status_md],
+        stream_every=0.1  # approx 10 fps snapshots (depends on device/network)
     )
+demo.queue().launch()