Spaces:

gannushalini2006
/

Object_detection

Runtime error

App Files Files Community

gannushalini2006 commited on 28 days ago

Commit

7f51efa

verified ·

1 Parent(s): 742b670

Update app.py

Browse files

Files changed (1) hide show

app.py +129 -64

app.py CHANGED Viewed

@@ -3,109 +3,174 @@ import torch
 import numpy as np
 import cv2
 from PIL import Image
 from ultralytics import YOLO
 from torchvision.models.detection import fasterrcnn_resnet50_fpn
 from transformers import AutoImageProcessor, AutoModelForObjectDetection
-# -----------------------------
-# Load Models (CPU-friendly)
-# -----------------------------
 yolo = YOLO("yolov8n.pt")
 frcnn = fasterrcnn_resnet50_fpn(pretrained=True)
-frcnn.eval()
-processor = AutoImageProcessor.from_pretrained("SenseTime/deformable-detr")
-detr = AutoModelForObjectDetection.from_pretrained("SenseTime/deformable-detr")
-detr.eval()
-# -----------------------------
-# Utility
-# -----------------------------
-def iou(box1, box2):
     x1, y1 = max(box1[0], box2[0]), max(box1[1], box2[1])
     x2, y2 = min(box1[2], box2[2]), min(box1[3], box2[3])
     inter = max(0, x2 - x1) * max(0, y2 - y1)
     area1 = (box1[2]-box1[0])*(box1[3]-box1[1])
     area2 = (box2[2]-box2[0])*(box2[3]-box2[1])
     return inter / (area1 + area2 - inter + 1e-6)
-def draw(image, detections):
     img = np.array(image)
     for d in detections:
-        x1,y1,x2,y2 = map(int, d["box"])
         cv2.rectangle(img, (x1,y1), (x2,y2), (0,255,0), 2)
-        cv2.putText(img, d["label"], (x1,y1-6),
-                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1)
     return Image.fromarray(img)
-# -----------------------------
-# Model Predictions
-# -----------------------------
-def detect_yolo(img):
-    r = yolo(img)[0]
-    return [{"box": b.xyxy[0].cpu().numpy(), "model": "YOLO", "label": "object"}
-            for b in r.boxes]
-def detect_frcnn(img):
-    t = torch.tensor(np.array(img)/255.).permute(2,0,1).float().unsqueeze(0)
-    o = frcnn(t)[0]
-    return [{"box": b.cpu().numpy(), "model": "FRCNN", "label": "object"}
-            for b,s in zip(o["boxes"], o["scores"]) if s > 0.6]
-def detect_detr(img):
-    inp = processor(images=img, return_tensors="pt")
-    out = detr(**inp)
-    size = torch.tensor([img.size[::-1]])
-    res = processor.post_process_object_detection(out, size, threshold=0.7)[0]
-    return [{"box": b.cpu().numpy(), "model": "DETR", "label": "object"}
-            for b in res["boxes"]]
-# -----------------------------
 # HARD VOTING
-# -----------------------------
-def hard_vote(dets, votes=2, iou_th=0.5):
     final = []
-    for d in dets:
-        agree = [o for o in dets if d["model"] != o["model"]
-                 and iou(d["box"], o["box"]) > iou_th]
-        models = {d["model"]} | {a["model"] for a in agree}
-        if len(models) >= votes:
-            box = np.mean([d["box"]] + [a["box"] for a in agree], axis=0)
-            final.append({"box": box, "label": f"Ensemble ({len(models)})"})
-    return final
-# -----------------------------
 # LIVE FRAME FUNCTION
-# -----------------------------
 def live_detect(frame):
-    img = Image.fromarray(frame)
-    dets = (
-        detect_yolo(img) +
-        detect_frcnn(img) +
-        detect_detr(img)
     )
-    voted = hard_vote(dets)
-    return np.array(draw(img, voted))
-# -----------------------------
-# Gradio LIVE Interface
-# -----------------------------
 demo = gr.Interface(
     fn=live_detect,
     inputs=gr.Image(source="webcam", streaming=True),
     outputs=gr.Image(),
     live=True,
-    title="Live Object Detection (Hard Voting Ensemble)",
-    description="YOLOv8 + Faster R-CNN + Deformable DETR — Live Webcam via Browser"
 )
 demo.launch()

 import numpy as np
 import cv2
 from PIL import Image
 from ultralytics import YOLO
 from torchvision.models.detection import fasterrcnn_resnet50_fpn
 from transformers import AutoImageProcessor, AutoModelForObjectDetection
+# -------------------------------------------------
+# Device
+# -------------------------------------------------
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# -------------------------------------------------
+# Load Models
+# -------------------------------------------------
+# YOLOv8
 yolo = YOLO("yolov8n.pt")
+# Faster R-CNN
 frcnn = fasterrcnn_resnet50_fpn(pretrained=True)
+frcnn.to(device).eval()
+# Deformable DETR
+processor = AutoImageProcessor.from_pretrained(
+    "SenseTime/deformable-detr",
+    use_fast=False
+)
+detr = AutoModelForObjectDetection.from_pretrained(
+    "SenseTime/deformable-detr"
+)
+detr.to(device).eval()
+# -------------------------------------------------
+# Utility Functions
+# -------------------------------------------------
+def compute_iou(box1, box2):
     x1, y1 = max(box1[0], box2[0]), max(box1[1], box2[1])
     x2, y2 = min(box1[2], box2[2]), min(box1[3], box2[3])
     inter = max(0, x2 - x1) * max(0, y2 - y1)
     area1 = (box1[2]-box1[0])*(box1[3]-box1[1])
     area2 = (box2[2]-box2[0])*(box2[3]-box2[1])
     return inter / (area1 + area2 - inter + 1e-6)
+def draw_boxes(image, detections):
     img = np.array(image)
     for d in detections:
+        x1, y1, x2, y2 = map(int, d["box"])
+        label = d["label"]
         cv2.rectangle(img, (x1,y1), (x2,y2), (0,255,0), 2)
+        cv2.putText(
+            img, label, (x1, y1-6),
+            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1
+        )
     return Image.fromarray(img)
+# -------------------------------------------------
+# Model Inference
+# -------------------------------------------------
+def yolo_detect(image):
+    results = yolo(image)[0]
+    dets = []
+    for b in results.boxes:
+        dets.append({
+            "box": b.xyxy[0].cpu().numpy(),
+            "model": "YOLO"
+        })
+    return dets
+def frcnn_detect(image):
+    img = torch.tensor(np.array(image)/255.).permute(2,0,1).float()
+    img = img.unsqueeze(0).to(device)
+    with torch.no_grad():
+        out = frcnn(img)[0]
+    dets = []
+    for box, score in zip(out["boxes"], out["scores"]):
+        if score > 0.6:
+            dets.append({
+                "box": box.cpu().numpy(),
+                "model": "FRCNN"
+            })
+    return dets
+def detr_detect(image):
+    inputs = processor(images=image, return_tensors="pt").to(device)
+    with torch.no_grad():
+        outputs = detr(**inputs)
+    size = torch.tensor([image.size[::-1]]).to(device)
+    results = processor.post_process_object_detection(
+        outputs, target_sizes=size, threshold=0.7
+    )[0]
+    dets = []
+    for box in results["boxes"]:
+        dets.append({
+            "box": box.cpu().numpy(),
+            "model": "DETR"
+        })
+    return dets
+# -------------------------------------------------
 # HARD VOTING
+# -------------------------------------------------
+def hard_vote(detections, vote_thresh=2, iou_thresh=0.5):
     final = []
+    for d in detections:
+        votes = [d]
+        for o in detections:
+            if d["model"] != o["model"]:
+                if compute_iou(d["box"], o["box"]) >= iou_thresh:
+                    votes.append(o)
+        models = set(v["model"] for v in votes)
+        if len(models) >= vote_thresh:
+            avg_box = np.mean([v["box"] for v in votes], axis=0)
+            final.append({
+                "box": avg_box,
+                "label": f"Ensemble ({len(models)})"
+            })
+    # remove duplicates
+    unique = []
+    for d in final:
+        if not any(compute_iou(d["box"], u["box"]) > 0.8 for u in unique):
+            unique.append(d)
+    return unique
+# -------------------------------------------------
 # LIVE FRAME FUNCTION
+# -------------------------------------------------
 def live_detect(frame):
+    image = Image.fromarray(frame)
+    detections = (
+        yolo_detect(image) +
+        frcnn_detect(image) +
+        detr_detect(image)
     )
+    voted = hard_vote(detections)
+    output = draw_boxes(image, voted)
+    return np.array(output)
+# -------------------------------------------------
+# Gradio Interface (Webcam)
+# -------------------------------------------------
 demo = gr.Interface(
     fn=live_detect,
     inputs=gr.Image(source="webcam", streaming=True),
     outputs=gr.Image(),
     live=True,
+    title="Live Object Detection – Hard Voting Ensemble",
+    description=(
+        "YOLOv8 + Faster R-CNN + Deformable DETR\n"
+        "Browser-based webcam with IoU-based hard voting."
+    )
 )
 demo.launch()