Spaces:
Paused
Paused
Zhen Ye
commited on
Commit
·
65dd451
1
Parent(s):
cac69cc
git commit -m "Handle grounding dino post-process API differences"
Browse files- demo.html +42 -43
- models/detectors/grounding_dino.py +23 -7
demo.html
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
<head>
|
| 5 |
<meta charset="UTF-8">
|
| 6 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 7 |
-
<title>
|
| 8 |
<style>
|
| 9 |
* {
|
| 10 |
margin: 0;
|
|
@@ -13,8 +13,9 @@
|
|
| 13 |
}
|
| 14 |
|
| 15 |
body {
|
| 16 |
-
font-family:
|
| 17 |
-
background: linear-gradient(
|
|
|
|
| 18 |
min-height: 100vh;
|
| 19 |
padding: 20px;
|
| 20 |
}
|
|
@@ -25,17 +26,17 @@
|
|
| 25 |
}
|
| 26 |
|
| 27 |
h1 {
|
| 28 |
-
color:
|
| 29 |
text-align: center;
|
| 30 |
margin-bottom: 30px;
|
| 31 |
font-size: 2.5rem;
|
| 32 |
-
|
| 33 |
}
|
| 34 |
|
| 35 |
.main-card {
|
| 36 |
-
background:
|
| 37 |
border-radius: 16px;
|
| 38 |
-
box-shadow: 0
|
| 39 |
padding: 40px;
|
| 40 |
}
|
| 41 |
|
|
@@ -60,22 +61,23 @@
|
|
| 60 |
.mode-card {
|
| 61 |
position: relative;
|
| 62 |
padding: 20px;
|
| 63 |
-
border:
|
| 64 |
border-radius: 12px;
|
| 65 |
cursor: pointer;
|
| 66 |
transition: all 0.3s ease;
|
| 67 |
text-align: center;
|
|
|
|
| 68 |
}
|
| 69 |
|
| 70 |
.mode-card:hover {
|
| 71 |
-
border-color: #
|
| 72 |
transform: translateY(-2px);
|
| 73 |
-
box-shadow: 0
|
| 74 |
}
|
| 75 |
|
| 76 |
.mode-card.selected {
|
| 77 |
-
border-color: #
|
| 78 |
-
background: #
|
| 79 |
}
|
| 80 |
|
| 81 |
.mode-card.disabled {
|
|
@@ -89,8 +91,7 @@
|
|
| 89 |
}
|
| 90 |
|
| 91 |
.mode-icon {
|
| 92 |
-
|
| 93 |
-
margin-bottom: 10px;
|
| 94 |
}
|
| 95 |
|
| 96 |
.mode-title {
|
|
@@ -102,8 +103,8 @@
|
|
| 102 |
.mode-badge {
|
| 103 |
display: inline-block;
|
| 104 |
padding: 4px 8px;
|
| 105 |
-
background: #
|
| 106 |
-
color:
|
| 107 |
font-size: 0.7rem;
|
| 108 |
border-radius: 4px;
|
| 109 |
font-weight: 600;
|
|
@@ -126,16 +127,17 @@
|
|
| 126 |
.input-group select {
|
| 127 |
width: 100%;
|
| 128 |
padding: 12px;
|
| 129 |
-
border:
|
| 130 |
border-radius: 8px;
|
| 131 |
font-size: 1rem;
|
| 132 |
transition: border-color 0.3s;
|
|
|
|
| 133 |
}
|
| 134 |
|
| 135 |
.input-group input[type="text"]:focus,
|
| 136 |
.input-group select:focus {
|
| 137 |
outline: none;
|
| 138 |
-
border-color: #
|
| 139 |
}
|
| 140 |
|
| 141 |
.file-input-wrapper {
|
|
@@ -147,8 +149,8 @@
|
|
| 147 |
.file-input-label {
|
| 148 |
display: block;
|
| 149 |
padding: 15px;
|
| 150 |
-
background: #
|
| 151 |
-
border:
|
| 152 |
border-radius: 8px;
|
| 153 |
text-align: center;
|
| 154 |
cursor: pointer;
|
|
@@ -156,13 +158,13 @@
|
|
| 156 |
}
|
| 157 |
|
| 158 |
.file-input-label:hover {
|
| 159 |
-
border-color: #
|
| 160 |
-
background: #
|
| 161 |
}
|
| 162 |
|
| 163 |
.file-input-label.has-file {
|
| 164 |
-
border-color: #
|
| 165 |
-
background: #
|
| 166 |
}
|
| 167 |
|
| 168 |
input[type="file"] {
|
|
@@ -185,13 +187,13 @@
|
|
| 185 |
}
|
| 186 |
|
| 187 |
.btn-primary {
|
| 188 |
-
background:
|
| 189 |
-
color:
|
| 190 |
}
|
| 191 |
|
| 192 |
.btn-primary:hover:not(:disabled) {
|
| 193 |
transform: translateY(-2px);
|
| 194 |
-
box-shadow: 0
|
| 195 |
}
|
| 196 |
|
| 197 |
.btn:disabled {
|
|
@@ -232,8 +234,8 @@
|
|
| 232 |
.download-btn {
|
| 233 |
margin-top: 12px;
|
| 234 |
padding: 10px 16px;
|
| 235 |
-
background: #
|
| 236 |
-
color:
|
| 237 |
text-decoration: none;
|
| 238 |
border-radius: 6px;
|
| 239 |
display: inline-block;
|
|
@@ -241,7 +243,7 @@
|
|
| 241 |
}
|
| 242 |
|
| 243 |
.download-btn:hover {
|
| 244 |
-
background: #
|
| 245 |
}
|
| 246 |
|
| 247 |
/* Loading spinner */
|
|
@@ -256,8 +258,8 @@
|
|
| 256 |
}
|
| 257 |
|
| 258 |
.spinner {
|
| 259 |
-
border: 4px solid #
|
| 260 |
-
border-top: 4px solid #
|
| 261 |
border-radius: 50%;
|
| 262 |
width: 40px;
|
| 263 |
height: 40px;
|
|
@@ -283,7 +285,7 @@
|
|
| 283 |
top: 0;
|
| 284 |
width: 100%;
|
| 285 |
height: 100%;
|
| 286 |
-
background: rgba(
|
| 287 |
align-items: center;
|
| 288 |
justify-content: center;
|
| 289 |
}
|
|
@@ -312,8 +314,8 @@
|
|
| 312 |
|
| 313 |
.modal-btn {
|
| 314 |
padding: 10px 24px;
|
| 315 |
-
background: #
|
| 316 |
-
color:
|
| 317 |
border: none;
|
| 318 |
border-radius: 6px;
|
| 319 |
cursor: pointer;
|
|
@@ -321,13 +323,13 @@
|
|
| 321 |
}
|
| 322 |
|
| 323 |
.modal-btn:hover {
|
| 324 |
-
background: #
|
| 325 |
}
|
| 326 |
</style>
|
| 327 |
</head>
|
| 328 |
<body>
|
| 329 |
<div class="container">
|
| 330 |
-
<h1
|
| 331 |
|
| 332 |
<div class="main-card">
|
| 333 |
<!-- Mode Selection -->
|
|
@@ -336,19 +338,16 @@
|
|
| 336 |
<div class="mode-selector">
|
| 337 |
<label class="mode-card selected">
|
| 338 |
<input type="radio" name="mode" value="object_detection" checked>
|
| 339 |
-
<div class="mode-icon">🎯</div>
|
| 340 |
<div class="mode-title">Object Detection</div>
|
| 341 |
</label>
|
| 342 |
|
| 343 |
<label class="mode-card">
|
| 344 |
<input type="radio" name="mode" value="segmentation">
|
| 345 |
-
<div class="mode-icon">🎨</div>
|
| 346 |
<div class="mode-title">Segmentation</div>
|
| 347 |
</label>
|
| 348 |
|
| 349 |
<label class="mode-card disabled">
|
| 350 |
<input type="radio" name="mode" value="drone_detection">
|
| 351 |
-
<div class="mode-icon">🚁</div>
|
| 352 |
<div class="mode-title">Drone Detection</div>
|
| 353 |
<span class="mode-badge">COMING SOON</span>
|
| 354 |
</label>
|
|
@@ -399,7 +398,7 @@
|
|
| 399 |
<label>3. Upload Video</label>
|
| 400 |
<div class="file-input-wrapper">
|
| 401 |
<label class="file-input-label" id="fileLabel" for="videoFile">
|
| 402 |
-
|
| 403 |
</label>
|
| 404 |
<input type="file" id="videoFile" accept="video/*">
|
| 405 |
</div>
|
|
@@ -409,7 +408,7 @@
|
|
| 409 |
<!-- Process Button -->
|
| 410 |
<div class="section">
|
| 411 |
<button class="btn btn-primary" id="processBtn" disabled>
|
| 412 |
-
|
| 413 |
</button>
|
| 414 |
</div>
|
| 415 |
|
|
@@ -434,7 +433,7 @@
|
|
| 434 |
<div class="video-card-body">
|
| 435 |
<video id="processedVideo" controls autoplay loop></video>
|
| 436 |
<a id="downloadBtn" class="download-btn" download="processed.mp4">
|
| 437 |
-
|
| 438 |
</a>
|
| 439 |
</div>
|
| 440 |
</div>
|
|
|
|
| 4 |
<head>
|
| 5 |
<meta charset="UTF-8">
|
| 6 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 7 |
+
<title>Perception System</title>
|
| 8 |
<style>
|
| 9 |
* {
|
| 10 |
margin: 0;
|
|
|
|
| 13 |
}
|
| 14 |
|
| 15 |
body {
|
| 16 |
+
font-family: "IBM Plex Sans", "Avenir Next", "Helvetica Neue", sans-serif;
|
| 17 |
+
background: linear-gradient(180deg, #f6f7f9 0%, #eef1f4 100%);
|
| 18 |
+
color: #1f2933;
|
| 19 |
min-height: 100vh;
|
| 20 |
padding: 20px;
|
| 21 |
}
|
|
|
|
| 26 |
}
|
| 27 |
|
| 28 |
h1 {
|
| 29 |
+
color: #1f2933;
|
| 30 |
text-align: center;
|
| 31 |
margin-bottom: 30px;
|
| 32 |
font-size: 2.5rem;
|
| 33 |
+
letter-spacing: 0.5px;
|
| 34 |
}
|
| 35 |
|
| 36 |
.main-card {
|
| 37 |
+
background: #ffffff;
|
| 38 |
border-radius: 16px;
|
| 39 |
+
box-shadow: 0 18px 40px rgba(16, 24, 40, 0.12);
|
| 40 |
padding: 40px;
|
| 41 |
}
|
| 42 |
|
|
|
|
| 61 |
.mode-card {
|
| 62 |
position: relative;
|
| 63 |
padding: 20px;
|
| 64 |
+
border: 1px solid #d6dbe0;
|
| 65 |
border-radius: 12px;
|
| 66 |
cursor: pointer;
|
| 67 |
transition: all 0.3s ease;
|
| 68 |
text-align: center;
|
| 69 |
+
background: #f9fafb;
|
| 70 |
}
|
| 71 |
|
| 72 |
.mode-card:hover {
|
| 73 |
+
border-color: #4b5563;
|
| 74 |
transform: translateY(-2px);
|
| 75 |
+
box-shadow: 0 6px 16px rgba(16, 24, 40, 0.12);
|
| 76 |
}
|
| 77 |
|
| 78 |
.mode-card.selected {
|
| 79 |
+
border-color: #1f2933;
|
| 80 |
+
background: #eef2f6;
|
| 81 |
}
|
| 82 |
|
| 83 |
.mode-card.disabled {
|
|
|
|
| 91 |
}
|
| 92 |
|
| 93 |
.mode-icon {
|
| 94 |
+
display: none;
|
|
|
|
| 95 |
}
|
| 96 |
|
| 97 |
.mode-title {
|
|
|
|
| 103 |
.mode-badge {
|
| 104 |
display: inline-block;
|
| 105 |
padding: 4px 8px;
|
| 106 |
+
background: #6b7280;
|
| 107 |
+
color: #f9fafb;
|
| 108 |
font-size: 0.7rem;
|
| 109 |
border-radius: 4px;
|
| 110 |
font-weight: 600;
|
|
|
|
| 127 |
.input-group select {
|
| 128 |
width: 100%;
|
| 129 |
padding: 12px;
|
| 130 |
+
border: 1px solid #d6dbe0;
|
| 131 |
border-radius: 8px;
|
| 132 |
font-size: 1rem;
|
| 133 |
transition: border-color 0.3s;
|
| 134 |
+
background: #ffffff;
|
| 135 |
}
|
| 136 |
|
| 137 |
.input-group input[type="text"]:focus,
|
| 138 |
.input-group select:focus {
|
| 139 |
outline: none;
|
| 140 |
+
border-color: #4b5563;
|
| 141 |
}
|
| 142 |
|
| 143 |
.file-input-wrapper {
|
|
|
|
| 149 |
.file-input-label {
|
| 150 |
display: block;
|
| 151 |
padding: 15px;
|
| 152 |
+
background: #f3f4f6;
|
| 153 |
+
border: 1px dashed #bfc5cc;
|
| 154 |
border-radius: 8px;
|
| 155 |
text-align: center;
|
| 156 |
cursor: pointer;
|
|
|
|
| 158 |
}
|
| 159 |
|
| 160 |
.file-input-label:hover {
|
| 161 |
+
border-color: #4b5563;
|
| 162 |
+
background: #eceff3;
|
| 163 |
}
|
| 164 |
|
| 165 |
.file-input-label.has-file {
|
| 166 |
+
border-color: #1f2933;
|
| 167 |
+
background: #e8edf2;
|
| 168 |
}
|
| 169 |
|
| 170 |
input[type="file"] {
|
|
|
|
| 187 |
}
|
| 188 |
|
| 189 |
.btn-primary {
|
| 190 |
+
background: #1f2933;
|
| 191 |
+
color: #f9fafb;
|
| 192 |
}
|
| 193 |
|
| 194 |
.btn-primary:hover:not(:disabled) {
|
| 195 |
transform: translateY(-2px);
|
| 196 |
+
box-shadow: 0 6px 16px rgba(16, 24, 40, 0.2);
|
| 197 |
}
|
| 198 |
|
| 199 |
.btn:disabled {
|
|
|
|
| 234 |
.download-btn {
|
| 235 |
margin-top: 12px;
|
| 236 |
padding: 10px 16px;
|
| 237 |
+
background: #374151;
|
| 238 |
+
color: #f9fafb;
|
| 239 |
text-decoration: none;
|
| 240 |
border-radius: 6px;
|
| 241 |
display: inline-block;
|
|
|
|
| 243 |
}
|
| 244 |
|
| 245 |
.download-btn:hover {
|
| 246 |
+
background: #1f2933;
|
| 247 |
}
|
| 248 |
|
| 249 |
/* Loading spinner */
|
|
|
|
| 258 |
}
|
| 259 |
|
| 260 |
.spinner {
|
| 261 |
+
border: 4px solid #e5e7eb;
|
| 262 |
+
border-top: 4px solid #1f2933;
|
| 263 |
border-radius: 50%;
|
| 264 |
width: 40px;
|
| 265 |
height: 40px;
|
|
|
|
| 285 |
top: 0;
|
| 286 |
width: 100%;
|
| 287 |
height: 100%;
|
| 288 |
+
background: rgba(15, 23, 42, 0.5);
|
| 289 |
align-items: center;
|
| 290 |
justify-content: center;
|
| 291 |
}
|
|
|
|
| 314 |
|
| 315 |
.modal-btn {
|
| 316 |
padding: 10px 24px;
|
| 317 |
+
background: #1f2933;
|
| 318 |
+
color: #f9fafb;
|
| 319 |
border: none;
|
| 320 |
border-radius: 6px;
|
| 321 |
cursor: pointer;
|
|
|
|
| 323 |
}
|
| 324 |
|
| 325 |
.modal-btn:hover {
|
| 326 |
+
background: #111827;
|
| 327 |
}
|
| 328 |
</style>
|
| 329 |
</head>
|
| 330 |
<body>
|
| 331 |
<div class="container">
|
| 332 |
+
<h1>Perception System</h1>
|
| 333 |
|
| 334 |
<div class="main-card">
|
| 335 |
<!-- Mode Selection -->
|
|
|
|
| 338 |
<div class="mode-selector">
|
| 339 |
<label class="mode-card selected">
|
| 340 |
<input type="radio" name="mode" value="object_detection" checked>
|
|
|
|
| 341 |
<div class="mode-title">Object Detection</div>
|
| 342 |
</label>
|
| 343 |
|
| 344 |
<label class="mode-card">
|
| 345 |
<input type="radio" name="mode" value="segmentation">
|
|
|
|
| 346 |
<div class="mode-title">Segmentation</div>
|
| 347 |
</label>
|
| 348 |
|
| 349 |
<label class="mode-card disabled">
|
| 350 |
<input type="radio" name="mode" value="drone_detection">
|
|
|
|
| 351 |
<div class="mode-title">Drone Detection</div>
|
| 352 |
<span class="mode-badge">COMING SOON</span>
|
| 353 |
</label>
|
|
|
|
| 398 |
<label>3. Upload Video</label>
|
| 399 |
<div class="file-input-wrapper">
|
| 400 |
<label class="file-input-label" id="fileLabel" for="videoFile">
|
| 401 |
+
Click to select video file (MP4)
|
| 402 |
</label>
|
| 403 |
<input type="file" id="videoFile" accept="video/*">
|
| 404 |
</div>
|
|
|
|
| 408 |
<!-- Process Button -->
|
| 409 |
<div class="section">
|
| 410 |
<button class="btn btn-primary" id="processBtn" disabled>
|
| 411 |
+
Process Video
|
| 412 |
</button>
|
| 413 |
</div>
|
| 414 |
|
|
|
|
| 433 |
<div class="video-card-body">
|
| 434 |
<video id="processedVideo" controls autoplay loop></video>
|
| 435 |
<a id="downloadBtn" class="download-btn" download="processed.mp4">
|
| 436 |
+
Download Processed Video
|
| 437 |
</a>
|
| 438 |
</div>
|
| 439 |
</div>
|
models/detectors/grounding_dino.py
CHANGED
|
@@ -37,13 +37,29 @@ class GroundingDinoDetector(ObjectDetector):
|
|
| 37 |
with torch.no_grad():
|
| 38 |
outputs = self.model(**inputs)
|
| 39 |
target_sizes = torch.tensor([frame.shape[:2]], device=self.device)
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
boxes = processed["boxes"].cpu().numpy()
|
| 48 |
scores = processed["scores"].cpu().tolist()
|
| 49 |
label_names = list(processed.get("labels") or [])
|
|
|
|
| 37 |
with torch.no_grad():
|
| 38 |
outputs = self.model(**inputs)
|
| 39 |
target_sizes = torch.tensor([frame.shape[:2]], device=self.device)
|
| 40 |
+
try:
|
| 41 |
+
processed = self.processor.post_process_grounded_object_detection(
|
| 42 |
+
outputs,
|
| 43 |
+
inputs["input_ids"],
|
| 44 |
+
box_threshold=self.box_threshold,
|
| 45 |
+
text_threshold=self.text_threshold,
|
| 46 |
+
target_sizes=target_sizes,
|
| 47 |
+
)[0]
|
| 48 |
+
except TypeError:
|
| 49 |
+
try:
|
| 50 |
+
processed = self.processor.post_process_grounded_object_detection(
|
| 51 |
+
outputs,
|
| 52 |
+
inputs["input_ids"],
|
| 53 |
+
threshold=self.box_threshold,
|
| 54 |
+
text_threshold=self.text_threshold,
|
| 55 |
+
target_sizes=target_sizes,
|
| 56 |
+
)[0]
|
| 57 |
+
except TypeError:
|
| 58 |
+
processed = self.processor.post_process_grounded_object_detection(
|
| 59 |
+
outputs,
|
| 60 |
+
inputs["input_ids"],
|
| 61 |
+
target_sizes=target_sizes,
|
| 62 |
+
)[0]
|
| 63 |
boxes = processed["boxes"].cpu().numpy()
|
| 64 |
scores = processed["scores"].cpu().tolist()
|
| 65 |
label_names = list(processed.get("labels") or [])
|