Zhen Ye commited on
Commit
65dd451
·
1 Parent(s): cac69cc

git commit -m "Handle grounding dino post-process API differences"

Browse files
Files changed (2) hide show
  1. demo.html +42 -43
  2. models/detectors/grounding_dino.py +23 -7
demo.html CHANGED
@@ -4,7 +4,7 @@
4
  <head>
5
  <meta charset="UTF-8">
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
- <title>Video Object Detection</title>
8
  <style>
9
  * {
10
  margin: 0;
@@ -13,8 +13,9 @@
13
  }
14
 
15
  body {
16
- font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Helvetica', 'Arial', sans-serif;
17
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
 
18
  min-height: 100vh;
19
  padding: 20px;
20
  }
@@ -25,17 +26,17 @@
25
  }
26
 
27
  h1 {
28
- color: white;
29
  text-align: center;
30
  margin-bottom: 30px;
31
  font-size: 2.5rem;
32
- text-shadow: 2px 2px 4px rgba(0,0,0,0.2);
33
  }
34
 
35
  .main-card {
36
- background: white;
37
  border-radius: 16px;
38
- box-shadow: 0 20px 60px rgba(0,0,0,0.3);
39
  padding: 40px;
40
  }
41
 
@@ -60,22 +61,23 @@
60
  .mode-card {
61
  position: relative;
62
  padding: 20px;
63
- border: 2px solid #e0e0e0;
64
  border-radius: 12px;
65
  cursor: pointer;
66
  transition: all 0.3s ease;
67
  text-align: center;
 
68
  }
69
 
70
  .mode-card:hover {
71
- border-color: #667eea;
72
  transform: translateY(-2px);
73
- box-shadow: 0 4px 12px rgba(102, 126, 234, 0.2);
74
  }
75
 
76
  .mode-card.selected {
77
- border-color: #667eea;
78
- background: #f0f4ff;
79
  }
80
 
81
  .mode-card.disabled {
@@ -89,8 +91,7 @@
89
  }
90
 
91
  .mode-icon {
92
- font-size: 2rem;
93
- margin-bottom: 10px;
94
  }
95
 
96
  .mode-title {
@@ -102,8 +103,8 @@
102
  .mode-badge {
103
  display: inline-block;
104
  padding: 4px 8px;
105
- background: #ffc107;
106
- color: white;
107
  font-size: 0.7rem;
108
  border-radius: 4px;
109
  font-weight: 600;
@@ -126,16 +127,17 @@
126
  .input-group select {
127
  width: 100%;
128
  padding: 12px;
129
- border: 2px solid #e0e0e0;
130
  border-radius: 8px;
131
  font-size: 1rem;
132
  transition: border-color 0.3s;
 
133
  }
134
 
135
  .input-group input[type="text"]:focus,
136
  .input-group select:focus {
137
  outline: none;
138
- border-color: #667eea;
139
  }
140
 
141
  .file-input-wrapper {
@@ -147,8 +149,8 @@
147
  .file-input-label {
148
  display: block;
149
  padding: 15px;
150
- background: #f8f9fa;
151
- border: 2px dashed #ccc;
152
  border-radius: 8px;
153
  text-align: center;
154
  cursor: pointer;
@@ -156,13 +158,13 @@
156
  }
157
 
158
  .file-input-label:hover {
159
- border-color: #667eea;
160
- background: #f0f4ff;
161
  }
162
 
163
  .file-input-label.has-file {
164
- border-color: #28a745;
165
- background: #d4edda;
166
  }
167
 
168
  input[type="file"] {
@@ -185,13 +187,13 @@
185
  }
186
 
187
  .btn-primary {
188
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
189
- color: white;
190
  }
191
 
192
  .btn-primary:hover:not(:disabled) {
193
  transform: translateY(-2px);
194
- box-shadow: 0 4px 12px rgba(102, 126, 234, 0.4);
195
  }
196
 
197
  .btn:disabled {
@@ -232,8 +234,8 @@
232
  .download-btn {
233
  margin-top: 12px;
234
  padding: 10px 16px;
235
- background: #28a745;
236
- color: white;
237
  text-decoration: none;
238
  border-radius: 6px;
239
  display: inline-block;
@@ -241,7 +243,7 @@
241
  }
242
 
243
  .download-btn:hover {
244
- background: #218838;
245
  }
246
 
247
  /* Loading spinner */
@@ -256,8 +258,8 @@
256
  }
257
 
258
  .spinner {
259
- border: 4px solid #f3f3f3;
260
- border-top: 4px solid #667eea;
261
  border-radius: 50%;
262
  width: 40px;
263
  height: 40px;
@@ -283,7 +285,7 @@
283
  top: 0;
284
  width: 100%;
285
  height: 100%;
286
- background: rgba(0,0,0,0.5);
287
  align-items: center;
288
  justify-content: center;
289
  }
@@ -312,8 +314,8 @@
312
 
313
  .modal-btn {
314
  padding: 10px 24px;
315
- background: #667eea;
316
- color: white;
317
  border: none;
318
  border-radius: 6px;
319
  cursor: pointer;
@@ -321,13 +323,13 @@
321
  }
322
 
323
  .modal-btn:hover {
324
- background: #5568d3;
325
  }
326
  </style>
327
  </head>
328
  <body>
329
  <div class="container">
330
- <h1>🎥 Video Object Detection</h1>
331
 
332
  <div class="main-card">
333
  <!-- Mode Selection -->
@@ -336,19 +338,16 @@
336
  <div class="mode-selector">
337
  <label class="mode-card selected">
338
  <input type="radio" name="mode" value="object_detection" checked>
339
- <div class="mode-icon">🎯</div>
340
  <div class="mode-title">Object Detection</div>
341
  </label>
342
 
343
  <label class="mode-card">
344
  <input type="radio" name="mode" value="segmentation">
345
- <div class="mode-icon">🎨</div>
346
  <div class="mode-title">Segmentation</div>
347
  </label>
348
 
349
  <label class="mode-card disabled">
350
  <input type="radio" name="mode" value="drone_detection">
351
- <div class="mode-icon">🚁</div>
352
  <div class="mode-title">Drone Detection</div>
353
  <span class="mode-badge">COMING SOON</span>
354
  </label>
@@ -399,7 +398,7 @@
399
  <label>3. Upload Video</label>
400
  <div class="file-input-wrapper">
401
  <label class="file-input-label" id="fileLabel" for="videoFile">
402
- 📁 Click to select video file (MP4)
403
  </label>
404
  <input type="file" id="videoFile" accept="video/*">
405
  </div>
@@ -409,7 +408,7 @@
409
  <!-- Process Button -->
410
  <div class="section">
411
  <button class="btn btn-primary" id="processBtn" disabled>
412
- 🚀 Process Video
413
  </button>
414
  </div>
415
 
@@ -434,7 +433,7 @@
434
  <div class="video-card-body">
435
  <video id="processedVideo" controls autoplay loop></video>
436
  <a id="downloadBtn" class="download-btn" download="processed.mp4">
437
- ⬇️ Download Processed Video
438
  </a>
439
  </div>
440
  </div>
 
4
  <head>
5
  <meta charset="UTF-8">
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <title>Perception System</title>
8
  <style>
9
  * {
10
  margin: 0;
 
13
  }
14
 
15
  body {
16
+ font-family: "IBM Plex Sans", "Avenir Next", "Helvetica Neue", sans-serif;
17
+ background: linear-gradient(180deg, #f6f7f9 0%, #eef1f4 100%);
18
+ color: #1f2933;
19
  min-height: 100vh;
20
  padding: 20px;
21
  }
 
26
  }
27
 
28
  h1 {
29
+ color: #1f2933;
30
  text-align: center;
31
  margin-bottom: 30px;
32
  font-size: 2.5rem;
33
+ letter-spacing: 0.5px;
34
  }
35
 
36
  .main-card {
37
+ background: #ffffff;
38
  border-radius: 16px;
39
+ box-shadow: 0 18px 40px rgba(16, 24, 40, 0.12);
40
  padding: 40px;
41
  }
42
 
 
61
  .mode-card {
62
  position: relative;
63
  padding: 20px;
64
+ border: 1px solid #d6dbe0;
65
  border-radius: 12px;
66
  cursor: pointer;
67
  transition: all 0.3s ease;
68
  text-align: center;
69
+ background: #f9fafb;
70
  }
71
 
72
  .mode-card:hover {
73
+ border-color: #4b5563;
74
  transform: translateY(-2px);
75
+ box-shadow: 0 6px 16px rgba(16, 24, 40, 0.12);
76
  }
77
 
78
  .mode-card.selected {
79
+ border-color: #1f2933;
80
+ background: #eef2f6;
81
  }
82
 
83
  .mode-card.disabled {
 
91
  }
92
 
93
  .mode-icon {
94
+ display: none;
 
95
  }
96
 
97
  .mode-title {
 
103
  .mode-badge {
104
  display: inline-block;
105
  padding: 4px 8px;
106
+ background: #6b7280;
107
+ color: #f9fafb;
108
  font-size: 0.7rem;
109
  border-radius: 4px;
110
  font-weight: 600;
 
127
  .input-group select {
128
  width: 100%;
129
  padding: 12px;
130
+ border: 1px solid #d6dbe0;
131
  border-radius: 8px;
132
  font-size: 1rem;
133
  transition: border-color 0.3s;
134
+ background: #ffffff;
135
  }
136
 
137
  .input-group input[type="text"]:focus,
138
  .input-group select:focus {
139
  outline: none;
140
+ border-color: #4b5563;
141
  }
142
 
143
  .file-input-wrapper {
 
149
  .file-input-label {
150
  display: block;
151
  padding: 15px;
152
+ background: #f3f4f6;
153
+ border: 1px dashed #bfc5cc;
154
  border-radius: 8px;
155
  text-align: center;
156
  cursor: pointer;
 
158
  }
159
 
160
  .file-input-label:hover {
161
+ border-color: #4b5563;
162
+ background: #eceff3;
163
  }
164
 
165
  .file-input-label.has-file {
166
+ border-color: #1f2933;
167
+ background: #e8edf2;
168
  }
169
 
170
  input[type="file"] {
 
187
  }
188
 
189
  .btn-primary {
190
+ background: #1f2933;
191
+ color: #f9fafb;
192
  }
193
 
194
  .btn-primary:hover:not(:disabled) {
195
  transform: translateY(-2px);
196
+ box-shadow: 0 6px 16px rgba(16, 24, 40, 0.2);
197
  }
198
 
199
  .btn:disabled {
 
234
  .download-btn {
235
  margin-top: 12px;
236
  padding: 10px 16px;
237
+ background: #374151;
238
+ color: #f9fafb;
239
  text-decoration: none;
240
  border-radius: 6px;
241
  display: inline-block;
 
243
  }
244
 
245
  .download-btn:hover {
246
+ background: #1f2933;
247
  }
248
 
249
  /* Loading spinner */
 
258
  }
259
 
260
  .spinner {
261
+ border: 4px solid #e5e7eb;
262
+ border-top: 4px solid #1f2933;
263
  border-radius: 50%;
264
  width: 40px;
265
  height: 40px;
 
285
  top: 0;
286
  width: 100%;
287
  height: 100%;
288
+ background: rgba(15, 23, 42, 0.5);
289
  align-items: center;
290
  justify-content: center;
291
  }
 
314
 
315
  .modal-btn {
316
  padding: 10px 24px;
317
+ background: #1f2933;
318
+ color: #f9fafb;
319
  border: none;
320
  border-radius: 6px;
321
  cursor: pointer;
 
323
  }
324
 
325
  .modal-btn:hover {
326
+ background: #111827;
327
  }
328
  </style>
329
  </head>
330
  <body>
331
  <div class="container">
332
+ <h1>Perception System</h1>
333
 
334
  <div class="main-card">
335
  <!-- Mode Selection -->
 
338
  <div class="mode-selector">
339
  <label class="mode-card selected">
340
  <input type="radio" name="mode" value="object_detection" checked>
 
341
  <div class="mode-title">Object Detection</div>
342
  </label>
343
 
344
  <label class="mode-card">
345
  <input type="radio" name="mode" value="segmentation">
 
346
  <div class="mode-title">Segmentation</div>
347
  </label>
348
 
349
  <label class="mode-card disabled">
350
  <input type="radio" name="mode" value="drone_detection">
 
351
  <div class="mode-title">Drone Detection</div>
352
  <span class="mode-badge">COMING SOON</span>
353
  </label>
 
398
  <label>3. Upload Video</label>
399
  <div class="file-input-wrapper">
400
  <label class="file-input-label" id="fileLabel" for="videoFile">
401
+ Click to select video file (MP4)
402
  </label>
403
  <input type="file" id="videoFile" accept="video/*">
404
  </div>
 
408
  <!-- Process Button -->
409
  <div class="section">
410
  <button class="btn btn-primary" id="processBtn" disabled>
411
+ Process Video
412
  </button>
413
  </div>
414
 
 
433
  <div class="video-card-body">
434
  <video id="processedVideo" controls autoplay loop></video>
435
  <a id="downloadBtn" class="download-btn" download="processed.mp4">
436
+ Download Processed Video
437
  </a>
438
  </div>
439
  </div>
models/detectors/grounding_dino.py CHANGED
@@ -37,13 +37,29 @@ class GroundingDinoDetector(ObjectDetector):
37
  with torch.no_grad():
38
  outputs = self.model(**inputs)
39
  target_sizes = torch.tensor([frame.shape[:2]], device=self.device)
40
- processed = self.processor.post_process_grounded_object_detection(
41
- outputs,
42
- inputs["input_ids"],
43
- box_threshold=self.box_threshold,
44
- text_threshold=self.text_threshold,
45
- target_sizes=target_sizes,
46
- )[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  boxes = processed["boxes"].cpu().numpy()
48
  scores = processed["scores"].cpu().tolist()
49
  label_names = list(processed.get("labels") or [])
 
37
  with torch.no_grad():
38
  outputs = self.model(**inputs)
39
  target_sizes = torch.tensor([frame.shape[:2]], device=self.device)
40
+ try:
41
+ processed = self.processor.post_process_grounded_object_detection(
42
+ outputs,
43
+ inputs["input_ids"],
44
+ box_threshold=self.box_threshold,
45
+ text_threshold=self.text_threshold,
46
+ target_sizes=target_sizes,
47
+ )[0]
48
+ except TypeError:
49
+ try:
50
+ processed = self.processor.post_process_grounded_object_detection(
51
+ outputs,
52
+ inputs["input_ids"],
53
+ threshold=self.box_threshold,
54
+ text_threshold=self.text_threshold,
55
+ target_sizes=target_sizes,
56
+ )[0]
57
+ except TypeError:
58
+ processed = self.processor.post_process_grounded_object_detection(
59
+ outputs,
60
+ inputs["input_ids"],
61
+ target_sizes=target_sizes,
62
+ )[0]
63
  boxes = processed["boxes"].cpu().numpy()
64
  scores = processed["scores"].cpu().tolist()
65
  label_names = list(processed.get("labels") or [])