Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| import numpy as np | |
| import cv2 | |
| from PIL import Image | |
| from ultralytics import YOLO | |
| from torchvision.models.detection import fasterrcnn_resnet50_fpn | |
| from transformers import AutoImageProcessor, AutoModelForObjectDetection | |
| # ------------------------------------------------- | |
| # Device | |
| # ------------------------------------------------- | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| # ------------------------------------------------- | |
| # Load Models | |
| # ------------------------------------------------- | |
| # YOLOv8 | |
| yolo = YOLO("yolov8n.pt") | |
| # Faster R-CNN | |
| frcnn = fasterrcnn_resnet50_fpn(pretrained=True) | |
| frcnn.to(device).eval() | |
| # Deformable DETR | |
| processor = AutoImageProcessor.from_pretrained( | |
| "SenseTime/deformable-detr", | |
| use_fast=False | |
| ) | |
| detr = AutoModelForObjectDetection.from_pretrained( | |
| "SenseTime/deformable-detr" | |
| ) | |
| detr.to(device).eval() | |
| # ------------------------------------------------- | |
| # Utility Functions | |
| # ------------------------------------------------- | |
| def compute_iou(box1, box2): | |
| x1, y1 = max(box1[0], box2[0]), max(box1[1], box2[1]) | |
| x2, y2 = min(box1[2], box2[2]), min(box1[3], box2[3]) | |
| inter = max(0, x2 - x1) * max(0, y2 - y1) | |
| area1 = (box1[2]-box1[0])*(box1[3]-box1[1]) | |
| area2 = (box2[2]-box2[0])*(box2[3]-box2[1]) | |
| return inter / (area1 + area2 - inter + 1e-6) | |
| def draw_boxes(image, detections): | |
| img = np.array(image) | |
| for d in detections: | |
| x1, y1, x2, y2 = map(int, d["box"]) | |
| label = d["label"] | |
| cv2.rectangle(img, (x1,y1), (x2,y2), (0,255,0), 2) | |
| cv2.putText( | |
| img, label, (x1, y1-6), | |
| cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1 | |
| ) | |
| return Image.fromarray(img) | |
| # ------------------------------------------------- | |
| # Model Inference | |
| # ------------------------------------------------- | |
| def yolo_detect(image): | |
| results = yolo(image)[0] | |
| dets = [] | |
| for b in results.boxes: | |
| dets.append({ | |
| "box": b.xyxy[0].cpu().numpy(), | |
| "model": "YOLO" | |
| }) | |
| return dets | |
| def frcnn_detect(image): | |
| img = torch.tensor(np.array(image)/255.).permute(2,0,1).float() | |
| img = img.unsqueeze(0).to(device) | |
| with torch.no_grad(): | |
| out = frcnn(img)[0] | |
| dets = [] | |
| for box, score in zip(out["boxes"], out["scores"]): | |
| if score > 0.6: | |
| dets.append({ | |
| "box": box.cpu().numpy(), | |
| "model": "FRCNN" | |
| }) | |
| return dets | |
| def detr_detect(image): | |
| inputs = processor(images=image, return_tensors="pt").to(device) | |
| with torch.no_grad(): | |
| outputs = detr(**inputs) | |
| size = torch.tensor([image.size[::-1]]).to(device) | |
| results = processor.post_process_object_detection( | |
| outputs, target_sizes=size, threshold=0.7 | |
| )[0] | |
| dets = [] | |
| for box in results["boxes"]: | |
| dets.append({ | |
| "box": box.cpu().numpy(), | |
| "model": "DETR" | |
| }) | |
| return dets | |
| # ------------------------------------------------- | |
| # HARD VOTING | |
| # ------------------------------------------------- | |
| def hard_vote(detections, vote_thresh=2, iou_thresh=0.5): | |
| final = [] | |
| for d in detections: | |
| votes = [d] | |
| for o in detections: | |
| if d["model"] != o["model"]: | |
| if compute_iou(d["box"], o["box"]) >= iou_thresh: | |
| votes.append(o) | |
| models = set(v["model"] for v in votes) | |
| if len(models) >= vote_thresh: | |
| avg_box = np.mean([v["box"] for v in votes], axis=0) | |
| final.append({ | |
| "box": avg_box, | |
| "label": f"Ensemble ({len(models)})" | |
| }) | |
| # remove duplicates | |
| unique = [] | |
| for d in final: | |
| if not any(compute_iou(d["box"], u["box"]) > 0.8 for u in unique): | |
| unique.append(d) | |
| return unique | |
| # ------------------------------------------------- | |
| # LIVE FRAME FUNCTION | |
| # ------------------------------------------------- | |
| def live_detect(frame): | |
| image = Image.fromarray(frame) | |
| detections = ( | |
| yolo_detect(image) + | |
| frcnn_detect(image) + | |
| detr_detect(image) | |
| ) | |
| voted = hard_vote(detections) | |
| output = draw_boxes(image, voted) | |
| return np.array(output) | |
| # ------------------------------------------------- | |
| # Gradio Interface (Webcam) | |
| # ------------------------------------------------- | |
| demo = gr.Interface( | |
| fn=live_detect, | |
| inputs=gr.Image(source="webcam", streaming=True), | |
| outputs=gr.Image(), | |
| live=True, | |
| title="Live Object Detection – Hard Voting Ensemble", | |
| description=( | |
| "YOLOv8 + Faster R-CNN + Deformable DETR\n" | |
| "Browser-based webcam with IoU-based hard voting." | |
| ) | |
| ) | |
| demo.launch() | |