gannushalini2006 commited on
Commit
7f51efa
·
verified ·
1 Parent(s): 742b670

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +129 -64
app.py CHANGED
@@ -3,109 +3,174 @@ import torch
3
  import numpy as np
4
  import cv2
5
  from PIL import Image
 
6
  from ultralytics import YOLO
7
  from torchvision.models.detection import fasterrcnn_resnet50_fpn
8
  from transformers import AutoImageProcessor, AutoModelForObjectDetection
9
 
10
- # -----------------------------
11
- # Load Models (CPU-friendly)
12
- # -----------------------------
 
 
 
 
 
13
 
 
14
  yolo = YOLO("yolov8n.pt")
15
 
 
16
  frcnn = fasterrcnn_resnet50_fpn(pretrained=True)
17
- frcnn.eval()
18
 
19
- processor = AutoImageProcessor.from_pretrained("SenseTime/deformable-detr")
20
- detr = AutoModelForObjectDetection.from_pretrained("SenseTime/deformable-detr")
21
- detr.eval()
 
 
 
 
 
 
22
 
23
- # -----------------------------
24
- # Utility
25
- # -----------------------------
26
 
27
- def iou(box1, box2):
28
  x1, y1 = max(box1[0], box2[0]), max(box1[1], box2[1])
29
  x2, y2 = min(box1[2], box2[2]), min(box1[3], box2[3])
 
30
  inter = max(0, x2 - x1) * max(0, y2 - y1)
31
  area1 = (box1[2]-box1[0])*(box1[3]-box1[1])
32
  area2 = (box2[2]-box2[0])*(box2[3]-box2[1])
 
33
  return inter / (area1 + area2 - inter + 1e-6)
34
 
35
- def draw(image, detections):
36
  img = np.array(image)
37
  for d in detections:
38
- x1,y1,x2,y2 = map(int, d["box"])
 
39
  cv2.rectangle(img, (x1,y1), (x2,y2), (0,255,0), 2)
40
- cv2.putText(img, d["label"], (x1,y1-6),
41
- cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1)
 
 
42
  return Image.fromarray(img)
43
 
44
- # -----------------------------
45
- # Model Predictions
46
- # -----------------------------
47
-
48
- def detect_yolo(img):
49
- r = yolo(img)[0]
50
- return [{"box": b.xyxy[0].cpu().numpy(), "model": "YOLO", "label": "object"}
51
- for b in r.boxes]
52
-
53
- def detect_frcnn(img):
54
- t = torch.tensor(np.array(img)/255.).permute(2,0,1).float().unsqueeze(0)
55
- o = frcnn(t)[0]
56
- return [{"box": b.cpu().numpy(), "model": "FRCNN", "label": "object"}
57
- for b,s in zip(o["boxes"], o["scores"]) if s > 0.6]
58
-
59
- def detect_detr(img):
60
- inp = processor(images=img, return_tensors="pt")
61
- out = detr(**inp)
62
- size = torch.tensor([img.size[::-1]])
63
- res = processor.post_process_object_detection(out, size, threshold=0.7)[0]
64
- return [{"box": b.cpu().numpy(), "model": "DETR", "label": "object"}
65
- for b in res["boxes"]]
66
-
67
- # -----------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  # HARD VOTING
69
- # -----------------------------
70
 
71
- def hard_vote(dets, votes=2, iou_th=0.5):
72
  final = []
73
- for d in dets:
74
- agree = [o for o in dets if d["model"] != o["model"]
75
- and iou(d["box"], o["box"]) > iou_th]
76
- models = {d["model"]} | {a["model"] for a in agree}
77
- if len(models) >= votes:
78
- box = np.mean([d["box"]] + [a["box"] for a in agree], axis=0)
79
- final.append({"box": box, "label": f"Ensemble ({len(models)})"})
80
- return final
81
-
82
- # -----------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  # LIVE FRAME FUNCTION
84
- # -----------------------------
85
 
86
  def live_detect(frame):
87
- img = Image.fromarray(frame)
88
 
89
- dets = (
90
- detect_yolo(img) +
91
- detect_frcnn(img) +
92
- detect_detr(img)
93
  )
94
 
95
- voted = hard_vote(dets)
96
- return np.array(draw(img, voted))
97
 
98
- # -----------------------------
99
- # Gradio LIVE Interface
100
- # -----------------------------
 
 
101
 
102
  demo = gr.Interface(
103
  fn=live_detect,
104
  inputs=gr.Image(source="webcam", streaming=True),
105
  outputs=gr.Image(),
106
  live=True,
107
- title="Live Object Detection (Hard Voting Ensemble)",
108
- description="YOLOv8 + Faster R-CNN + Deformable DETR — Live Webcam via Browser"
 
 
 
109
  )
110
 
111
  demo.launch()
 
3
  import numpy as np
4
  import cv2
5
  from PIL import Image
6
+
7
  from ultralytics import YOLO
8
  from torchvision.models.detection import fasterrcnn_resnet50_fpn
9
  from transformers import AutoImageProcessor, AutoModelForObjectDetection
10
 
11
+ # -------------------------------------------------
12
+ # Device
13
+ # -------------------------------------------------
14
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
+
16
+ # -------------------------------------------------
17
+ # Load Models
18
+ # -------------------------------------------------
19
 
20
+ # YOLOv8
21
  yolo = YOLO("yolov8n.pt")
22
 
23
+ # Faster R-CNN
24
  frcnn = fasterrcnn_resnet50_fpn(pretrained=True)
25
+ frcnn.to(device).eval()
26
 
27
+ # Deformable DETR
28
+ processor = AutoImageProcessor.from_pretrained(
29
+ "SenseTime/deformable-detr",
30
+ use_fast=False
31
+ )
32
+ detr = AutoModelForObjectDetection.from_pretrained(
33
+ "SenseTime/deformable-detr"
34
+ )
35
+ detr.to(device).eval()
36
 
37
+ # -------------------------------------------------
38
+ # Utility Functions
39
+ # -------------------------------------------------
40
 
41
+ def compute_iou(box1, box2):
42
  x1, y1 = max(box1[0], box2[0]), max(box1[1], box2[1])
43
  x2, y2 = min(box1[2], box2[2]), min(box1[3], box2[3])
44
+
45
  inter = max(0, x2 - x1) * max(0, y2 - y1)
46
  area1 = (box1[2]-box1[0])*(box1[3]-box1[1])
47
  area2 = (box2[2]-box2[0])*(box2[3]-box2[1])
48
+
49
  return inter / (area1 + area2 - inter + 1e-6)
50
 
51
+ def draw_boxes(image, detections):
52
  img = np.array(image)
53
  for d in detections:
54
+ x1, y1, x2, y2 = map(int, d["box"])
55
+ label = d["label"]
56
  cv2.rectangle(img, (x1,y1), (x2,y2), (0,255,0), 2)
57
+ cv2.putText(
58
+ img, label, (x1, y1-6),
59
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1
60
+ )
61
  return Image.fromarray(img)
62
 
63
+ # -------------------------------------------------
64
+ # Model Inference
65
+ # -------------------------------------------------
66
+
67
+ def yolo_detect(image):
68
+ results = yolo(image)[0]
69
+ dets = []
70
+ for b in results.boxes:
71
+ dets.append({
72
+ "box": b.xyxy[0].cpu().numpy(),
73
+ "model": "YOLO"
74
+ })
75
+ return dets
76
+
77
+ def frcnn_detect(image):
78
+ img = torch.tensor(np.array(image)/255.).permute(2,0,1).float()
79
+ img = img.unsqueeze(0).to(device)
80
+
81
+ with torch.no_grad():
82
+ out = frcnn(img)[0]
83
+
84
+ dets = []
85
+ for box, score in zip(out["boxes"], out["scores"]):
86
+ if score > 0.6:
87
+ dets.append({
88
+ "box": box.cpu().numpy(),
89
+ "model": "FRCNN"
90
+ })
91
+ return dets
92
+
93
+ def detr_detect(image):
94
+ inputs = processor(images=image, return_tensors="pt").to(device)
95
+
96
+ with torch.no_grad():
97
+ outputs = detr(**inputs)
98
+
99
+ size = torch.tensor([image.size[::-1]]).to(device)
100
+ results = processor.post_process_object_detection(
101
+ outputs, target_sizes=size, threshold=0.7
102
+ )[0]
103
+
104
+ dets = []
105
+ for box in results["boxes"]:
106
+ dets.append({
107
+ "box": box.cpu().numpy(),
108
+ "model": "DETR"
109
+ })
110
+ return dets
111
+
112
+ # -------------------------------------------------
113
  # HARD VOTING
114
+ # -------------------------------------------------
115
 
116
+ def hard_vote(detections, vote_thresh=2, iou_thresh=0.5):
117
  final = []
118
+
119
+ for d in detections:
120
+ votes = [d]
121
+ for o in detections:
122
+ if d["model"] != o["model"]:
123
+ if compute_iou(d["box"], o["box"]) >= iou_thresh:
124
+ votes.append(o)
125
+
126
+ models = set(v["model"] for v in votes)
127
+ if len(models) >= vote_thresh:
128
+ avg_box = np.mean([v["box"] for v in votes], axis=0)
129
+ final.append({
130
+ "box": avg_box,
131
+ "label": f"Ensemble ({len(models)})"
132
+ })
133
+
134
+ # remove duplicates
135
+ unique = []
136
+ for d in final:
137
+ if not any(compute_iou(d["box"], u["box"]) > 0.8 for u in unique):
138
+ unique.append(d)
139
+
140
+ return unique
141
+
142
+ # -------------------------------------------------
143
  # LIVE FRAME FUNCTION
144
+ # -------------------------------------------------
145
 
146
  def live_detect(frame):
147
+ image = Image.fromarray(frame)
148
 
149
+ detections = (
150
+ yolo_detect(image) +
151
+ frcnn_detect(image) +
152
+ detr_detect(image)
153
  )
154
 
155
+ voted = hard_vote(detections)
156
+ output = draw_boxes(image, voted)
157
 
158
+ return np.array(output)
159
+
160
+ # -------------------------------------------------
161
+ # Gradio Interface (Webcam)
162
+ # -------------------------------------------------
163
 
164
  demo = gr.Interface(
165
  fn=live_detect,
166
  inputs=gr.Image(source="webcam", streaming=True),
167
  outputs=gr.Image(),
168
  live=True,
169
+ title="Live Object Detection Hard Voting Ensemble",
170
+ description=(
171
+ "YOLOv8 + Faster R-CNN + Deformable DETR\n"
172
+ "Browser-based webcam with IoU-based hard voting."
173
+ )
174
  )
175
 
176
  demo.launch()